In [67]:
import sys
import os

# add project root to sys.path
sys.path.append(os.path.abspath(".."))
from src.preprocess_data import preprocess_data

In [None]:
import pandas as pd
import re, string, joblib
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import hstack


# Load Saved Artifacts
clf = joblib.load("D:/DataScience/Case Study/Elevate labs/Final_project/data/models/sentiment_model.pkl")
tfidf = joblib.load("D:/DataScience/Case Study/Elevate labs/Final_project/data/models/tfidf_vectorizer.pkl")
le = joblib.load("D:/DataScience/Case Study/Elevate labs/Final_project/data/models/label_encoder.pkl")
dummy_columns = joblib.load("D:/DataScience/Case Study/Elevate labs/Final_project/data/models/dummy_columns.pkl")


# Load Test Data
test_df = pd.read_csv("D:/DataScience/Case Study/Elevate labs/Final_project/data/twitter_validation.csv", 
                      header=None,
                      names=["id", "account_name", "sentiment_type", "message"])

test_df = preprocess_data(test_df)

# Encode target
y_test = le.transform(test_df["sentiment_type"])

# One-hot encode account_name
test_df_dummies = pd.get_dummies(test_df, columns=["account_name"], prefix="acc")
meta_cols=test_df_dummies.iloc[:,:4]
dummy_cols=test_df_dummies.iloc[:,4:].astype(int)
test_df_dummies=pd.concat([meta_cols,dummy_cols],axis=1)
test_df_dummies=test_df_dummies.drop(columns=["sentiment_type","message"],axis=1)

# Align dummy columns with training
for col in dummy_columns:
    if col not in test_df_dummies.columns:
        test_df_dummies[col] = 0
X_acc_test = test_df_dummies[dummy_columns].values  


# TF-IDF + account dummies
X_test_text = tfidf.transform(test_df["message_clean"]) 
X_test_final = hstack([X_test_text, X_acc_test])


# Predict & Evaluate
y_pred = clf.predict(X_test_final)

from sklearn.metrics import classification_report, accuracy_score
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.794

Classification Report:
               precision    recall  f1-score   support

  Irrelevant       0.80      0.67      0.73       172
    Negative       0.75      0.85      0.80       266
     Neutral       0.82      0.77      0.79       285
    Positive       0.81      0.84      0.82       277

    accuracy                           0.79      1000
   macro avg       0.80      0.78      0.79      1000
weighted avg       0.80      0.79      0.79      1000



In [69]:
test_df_dummies

Unnamed: 0,id,message_clean,acc_Amazon,acc_ApexLegends,acc_AssassinsCreed,acc_Battlefield,acc_Borderlands,acc_CS-GO,acc_CallOfDuty,acc_CallOfDutyBlackopsColdWar,...,acc_Overwatch,acc_PlayStation5(PS5),acc_PlayerUnknownsBattlegrounds(PUBG),acc_RedDeadRedemption(RDR),acc_TomClancysGhostRecon,acc_TomClancysRainbowSix,acc_Verizon,acc_WorldOfCraft,acc_Xbox(Xseries),acc_johnson&johnson
0,3364,i mentioned on facebook that i was struggling ...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,352,bbc news amazon boss jeff bezos rejects claims...,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,8312,why do i pay for word when it functions so poo...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4371,csgo matchmaking is so full of closet hacking ...,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4433,now the president is slapping americans in the...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,4891,⭐️ toronto is the arts and culture capital of ...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
996,4359,this is actually a good move tot bring more vi...,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
997,2652,today sucked so it’s time to drink wine n play...,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
998,8069,bought a fraction of microsoft today small wins,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
X_acc_test

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]])

In [71]:
test_df

Unnamed: 0,id,account_name,sentiment_type,message,message_clean
0,3364,Facebook,Irrelevant,I mentioned on Facebook that I was struggling ...,i mentioned on facebook that i was struggling ...
1,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...,bbc news amazon boss jeff bezos rejects claims...
2,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...,why do i pay for word when it functions so poo...
3,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,...",csgo matchmaking is so full of closet hacking ...
4,4433,Google,Neutral,Now the President is slapping Americans in the...,now the president is slapping americans in the...
...,...,...,...,...,...
995,4891,GrandTheftAuto(GTA),Irrelevant,⭐️ Toronto is the arts and culture capital of ...,⭐️ toronto is the arts and culture capital of ...
996,4359,CS-GO,Irrelevant,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...,this is actually a good move tot bring more vi...
997,2652,Borderlands,Positive,Today sucked so it’s time to drink wine n play...,today sucked so it’s time to drink wine n play...
998,8069,Microsoft,Positive,Bought a fraction of Microsoft today. Small wins.,bought a fraction of microsoft today small wins
