In [3]:
# Install Gensim only (do not downgrade numpy or scipy)
!pip install gensim --quiet

# Imports
import pandas as pd
import numpy as np
import re
import gensim.downloader as api
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Tweet preprocessing (simple, no nltk)
def preprocess_tweet(text):
    text = text.lower()
    text = re.sub(r"http\S+|@\w+|#\w+", "", text)   # Remove links, mentions, hashtags
    text = re.sub(r"[^a-z\s]", "", text)            # Remove punctuation/numbers
    words = text.split()
    words = [w for w in words if w not in ENGLISH_STOP_WORDS]
    return words

# Load Word2Vec (Google News, ~1.5GB, will download once)
w2v_model = api.load("word2vec-google-news-300")

# Convert tweet to average vector
def vectorize_tweet(tweet):
    words = preprocess_tweet(tweet)
    vectors = [w2v_model[w] for w in words if w in w2v_model]
    return np.mean(vectors, axis=0) if vectors else np.zeros(300)

# Load Tweets.csv (uploaded manually via Colab)
df = pd.read_csv("Tweets.csv")[['text', 'airline_sentiment']]

# Convert all tweets to vectors
df['vector'] = df['text'].apply(vectorize_tweet)

# Prepare training data
X = np.vstack(df['vector'].values)
y = df['airline_sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2})
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
clf = LogisticRegression(max_iter=1000, multi_class='multinomial')
clf.fit(X_train, y_train)

# Evaluate
acc = accuracy_score(y_test, clf.predict(X_test))
print("Tweet Sentiment Accuracy:", round(acc * 100, 2), "%")

# Prediction function
def predict_sentiment(tweet):
    vec = vectorize_tweet(tweet).reshape(1, -1)
    label = clf.predict(vec)[0]
    return {0: 'negative', 1: 'neutral', 2: 'positive'}[label]

# Example predictions
print("Example 1:", predict_sentiment("Terrible flight, rude staff and delay."))
print("Example 2:", predict_sentiment("Very smooth check-in, friendly staff."))
print("Example 3:", predict_sentiment("It was fine, not too bad."))




Tweet Sentiment Accuracy: 77.7 %
Example 1: negative
Example 2: positive
Example 3: negative
