In [None]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('Twitter_Data.csv')

df = df.dropna(subset=['category'])

df['category'] = df['category'].astype(int)
texts = df['clean_text'].astype(str)
labels = df['category']

train_texts, test_texts, train_labels, test_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(train_texts)
X_test = vectorizer.transform(test_texts)

model = LogisticRegression(max_iter=500)
model.fit(X_train, train_labels)

predictions = model.predict(X_test)
accuracy = accuracy_score(test_labels, predictions)
print(f"Model Accuracy: {accuracy:.4f}")
print(classification_report(test_labels, predictions))

with open("sentiment_model.pkl", "wb") as f:
    pickle.dump(model, f)
with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved successfully!")

Model Accuracy: 0.9258
              precision    recall  f1-score   support

          -1       0.91      0.83      0.87      7230
           0       0.92      0.98      0.95     10961
           1       0.94      0.94      0.94     14404

    accuracy                           0.93     32595
   macro avg       0.92      0.91      0.92     32595
weighted avg       0.93      0.93      0.92     32595

Model and vectorizer saved successfully!
