In [29]:
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [30]:
model = joblib.load("../Models/logistic_model.joblib")
tfidf = joblib.load("../Models/tfidf_vectorizer.joblib")
lb = joblib.load("../Models/label_encoder.joblib")

In [31]:
df = pd.read_csv("../Data/processed_data/cleaned_reviews.csv")

In [32]:
df.head()

Unnamed: 0,cleaned_text,sentiment
0,one reviewer mentioned watching oz episode you...,positive
1,wonderful little production filming technique ...,positive
2,thought wonderful way spend time hot summer we...,positive
3,basically there family little boy jake think t...,negative
4,petter matteis love time money visually stunni...,positive


In [33]:
x = df['cleaned_text']
y = lb.transform(df['sentiment'])

In [34]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=42, test_size=0.2, stratify=y)

In [35]:
x_test_tfidf = tfidf.transform(xtest)

In [36]:
y_pred = model.predict(x_test_tfidf)

In [37]:
print(f'Accuracy Score: {accuracy_score(ytest, y_pred)}')

Accuracy Score: 0.9086


In [38]:
print(f'Confussion Matrix:\n{confusion_matrix(ytest, y_pred)}')

Confussion Matrix:
[[4494  506]
 [ 408 4592]]


In [39]:
print(f'Classification Report:\n{classification_report(ytest, y_pred, target_names=lb.classes_)}')

Classification Report:
              precision    recall  f1-score   support

    negative       0.92      0.90      0.91      5000
    positive       0.90      0.92      0.91      5000

    accuracy                           0.91     10000
   macro avg       0.91      0.91      0.91     10000
weighted avg       0.91      0.91      0.91     10000



In [43]:
false_pos_idx = (y_pred  == 1) & (ytest == 0)
print(f'Sample False Positive Index:\n{xtest[false_pos_idx].head(5)}')

Sample False Positive Index:
39791    story bride fair amusing engaging one filmmake...
40714    little quentin seems mastered art cake eating ...
48388    order enjoy fur imaginary portrait diane arbus...
20169    quite fan novelistscreenwriter michael chabon ...
47706    recommendation gloria grahame run kind orphana...
Name: cleaned_text, dtype: object


In [45]:
false_neg_idx = (y_pred  == 0) & (ytest == 1)
print(f'Sample False Negative Index:\n{xtest[false_neg_idx].head(5)}')

Sample False Negative Index:
7266     farrah fawcett give award nominated performanc...
46536    cant believe comment show show genius sure doe...
39806    ive seen branaghs hamlet branagh old speaks fr...
45621    lady gentleman weve really got winner actually...
1396     citizen kanethe godfather part iidurville mart...
Name: cleaned_text, dtype: object
