In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
df = pd.read_csv(r"final_reviews.csv")

reviews = df['Review'].tolist()
labels = df['Actual Sentiment'].tolist()

vectorizer = TfidfVectorizer(ngram_range=(1, 2))
X_tfidf = vectorizer.fit_transform(reviews)


X_train, X_test, y_train, y_test = train_test_split(X_tfidf, labels, test_size=0.2, random_state=42)


print("Shape of TF-IDF matrix:", X_tfidf.shape)

Shape of TF-IDF matrix: (4000, 55366)


In [10]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X_tfidf, labels, test_size=0.2, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", len(y_train))
print("Shape of y_test:", len(y_test))

Shape of X_train: (3200, 55366)
Shape of X_test: (800, 55366)
Shape of y_train: 3200
Shape of y_test: 800


In [38]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.naive_bayes import MultinomialNB


param_grid = {
    'alpha': [0.1, 0.5, 1.0],
    'class_prior': [[0.8, 0.8, 0.2], [0.3, 0.3, 0.4], [0.5, 0.3, 0.2]]
}


nb_classifier = MultinomialNB()


smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

grid_search = GridSearchCV(estimator=nb_classifier, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)

grid_search.fit(X_train_resampled, y_train_resampled)


print("Best Parameters:")
print(grid_search.best_params_)


best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)


print("\nModel Evaluation:")
print(f"Accuracy: {accuracy:.4f}")


Fitting 5 folds for each of 9 candidates, totalling 45 fits
Best Parameters:
{'alpha': 0.1, 'class_prior': [0.3, 0.3, 0.4]}

Model Evaluation:
Accuracy: 0.9767


In [40]:

y_pred = best_model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)


print("\nModel Evaluation:")
print(f"Accuracy: {accuracy:.4f}")


Model Evaluation:
Accuracy: 0.9767


In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score


df = pd.read_csv(r"final_reviews.csv")

X = df['Review']
y = df['Actual Sentiment'].map({'positive': 1, 'neutral': 0, 'negative': -1}).values


tfidf = TfidfVectorizer(max_features=1500)
X = tfidf.fit_transform(X).toarray()


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)


regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train)


y_pred = regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R² Score (Accuracy): {r2}")


Mean Squared Error: 0.17048748392349733
R² Score (Accuracy): 0.7772788348104154


In [37]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


df = pd.read_csv(r"final_reviews.csv")


X = df['Review']
y = df['Actual Sentiment'].map({'positive': 1, 'neutral': 0, 'negative': -1}).values


vectorizer = TfidfVectorizer(max_features=500)
X_tfidf = vectorizer.fit_transform(X).toarray()


X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.3, random_state=42)


k =30
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)


y_pred = knn.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.74
