In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
import pickle

# Load the IMDb dataset from a local file
file_path = 'IMDB Dataset.csv'  # Path to your CSV file
df = pd.read_csv(file_path)

# Preprocessing (assuming it's already done in the dataset)
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})  # Convert labels to binary

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# Model Pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LogisticRegression(max_iter=1000))
])

# Grid Search for Hyperparameter Tuning
param_grid = {
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'clf__C': [0.1, 1, 10]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=3,n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Evaluate the best model
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)
print("Best Grid Search Score:", grid_search.best_score_)
# print("Best Grid Search Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

# Serialize and save the best model
with open('enhanced_sentiment_model.pkl', 'wb') as file:
    pickle.dump(best_model, file)

print("Model training complete and saved.")


Best Grid Search Score: 0.9070999761344716
Accuracy: 0.9173
              precision    recall  f1-score   support

           0       0.93      0.91      0.92      4961
           1       0.91      0.93      0.92      5039

    accuracy                           0.92     10000
   macro avg       0.92      0.92      0.92     10000
weighted avg       0.92      0.92      0.92     10000

Model training complete and saved.
