In [8]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler

# Load data
df = pd.read_csv(r"fb_sentiment.csv")

# Map labels for clarity
label_map = {'P': 'positive', 'N': 'negative', 'O': 'neutral'}
df['Sentiment'] = df['Label'].map(label_map)

# Features and labels
X = df['FBPost']
y = df['Sentiment']

# Vectorize text
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
X_vec = vectorizer.fit_transform(X)

# Oversample minority classes
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_vec, y)

# Split oversampled data
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

    negative       0.99      1.00      1.00       128
     neutral       0.89      1.00      0.94       128
    positive       1.00      0.87      0.93       129

    accuracy                           0.96       385
   macro avg       0.96      0.96      0.96       385
weighted avg       0.96      0.96      0.96       385

