In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('sentiment-analysis.csv')

# Strip spaces from column names
df.columns = df.columns.str.strip()

# Keep only Text and Sentiment columns
df_cleaned = df[['Text', 'Sentiment']].copy()

# Remove extra quotes and spaces
df_cleaned['Text'] = df_cleaned['Text'].str.strip('"').str.strip()

# Convert to lowercase and remove punctuation
df_cleaned['Text'] = df_cleaned['Text'].str.lower().str.replace(r'[^\w\s]', '', regex=True)

print(df_cleaned.head())


In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Vectorize the text
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df_cleaned['Text'])
y = df_cleaned['Sentiment']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
import joblib

# Save the model and vectorizer
joblib.dump(model, 'sentiment_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')
print("Model and Vectorizer saved successfully.")
