In [63]:
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import warnings
warnings.simplefilter("ignore")
df = pd.read_csv("test.csv",encoding='ISO-8859-1')
df.head(10)
messages = df[['text','sentiment']]
messages.isna().sum()
messages.dropna(inplace=True)
lm = WordNetLemmatizer()
tf = TfidfVectorizer(max_features=1000)
corpus = []
for i in range(len(messages)):
    review = re.sub('[^a-zA-Z]',' ', messages['text'][i])
    review = review.lower()
    review = review.split()
    review = [lm.lemmatize(word) for word in review if word not in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)
X = tf.fit_transform(corpus)

In [64]:
messages['clean_text'] = corpus

In [85]:
from sklearn.preprocessing import LabelEncoder
import torch 
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [69]:
le = LabelEncoder()
y = le.fit_transform(messages['sentiment'])

In [73]:
X_train,X_test,y_train,y_test = train_test_split(X.toarray(),y,test_size=0.3,random_state=42)

In [75]:
from torch.utils.data import Dataset,DataLoader

In [79]:
class SentimentDataset(Dataset):
    def __init__(self,X,y):
        self.X = torch.tensor(X,dtype =torch.float32)
        self.y = torch.tensor(y,dtype = torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self,idx):
        return self.X[idx],self.y[idx]
train_data = SentimentDataset(X_train,y_train)
test_data = SentimentDataset(X_test,y_test)

In [81]:
train_loader = DataLoader(dataset = train_data,batch_size=64,shuffle=True)
test_loader = DataLoader(dataset = test_data,batch_size =64)

In [249]:
class SentimentNet(nn.Module):
    def __init__(self,input_dim):
        super(SentimentNet,self).__init__()
        self.fc1 = nn.Linear(input_dim,128)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128,64)
        self.fc3 = nn.Linear(64,3)
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return self.fc3(x)
model = SentimentNet(input_dim = X.shape[1])

In [251]:
import torch.optim as optim 
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.1)

In [253]:
epochs =25
for epoch in range(epochs):
    epoch_loss =0
    model.train()
    for inputs, labels in train_loader:
        labels = labels.long()
        outputs = model(inputs)
        loss = criterion(outputs,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

Epoch 1/25, Loss: 1.0397
Epoch 2/25, Loss: 0.8622
Epoch 3/25, Loss: 0.8300
Epoch 4/25, Loss: 0.7704
Epoch 5/25, Loss: 0.7466
Epoch 6/25, Loss: 0.6794
Epoch 7/25, Loss: 0.6106
Epoch 8/25, Loss: 0.5807
Epoch 9/25, Loss: 0.5225
Epoch 10/25, Loss: 0.5752
Epoch 11/25, Loss: 0.5493
Epoch 12/25, Loss: 0.5008
Epoch 13/25, Loss: 0.5840
Epoch 14/25, Loss: 0.5462
Epoch 15/25, Loss: 0.4707
Epoch 16/25, Loss: 0.4428
Epoch 17/25, Loss: 0.4460
Epoch 18/25, Loss: 0.4191
Epoch 19/25, Loss: 0.4652
Epoch 20/25, Loss: 0.4221
Epoch 21/25, Loss: 0.4430
Epoch 22/25, Loss: 0.3927
Epoch 23/25, Loss: 0.4000
Epoch 24/25, Loss: 0.4015
Epoch 25/25, Loss: 0.4353


In [255]:
correct =0
total =0
model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        labels = labels.long()
        outputs = model(inputs)
        _,predicted = torch.max(outputs,1)
        total+= labels.size(0)
        correct += (predicted==labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")   

Test Accuracy: 60.04%


In [197]:
def preprocess(text):
    review = re.sub('[^a-zA-Z]',' ',text) 
    review = review.lower()
    review = review.split()
    review = [lm.lemmatize(word) for word in review if word not in stopwords.words('english')]
    review = ' '.join(review)
    return review

In [225]:
def predict_sentiment(input_text):
    prediction ={ 0: 'Neutral',
                1: 'Negative',
                 2 :'Positive'
}
                 
    text = preprocess(input_text)
    vec = tf.transform([text]).toarray()
    tensor  = torch.tensor(vec,dtype = torch.float32)
    model.eval()
    with torch.no_grad():
        output = model(tensor)
        _, predicted = torch.max(output, 1)
        return prediction[predicted.item()]
    

In [265]:
user_input = input("Enter a sentence to check sentiment: ")
sentiment = predict_sentiment(user_input)
print(f"The sentiment of the input is: {sentiment}")

Enter a sentence to check sentiment:  im sad


The sentiment of the input is: Neutral


In [301]:

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import  accuracy_score,classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV

In [None]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Classifier (SVC)': SVC(),
    'Naive Bayes': GaussianNB(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    'AdaBoost': AdaBoostClassifier(),
    'GradientBoost': GradientBoostingClassifier()
}

In [313]:
X_train,X_test,y_train,y_test = train_test_split(X.toarray(),y,test_size=0.3,random_state=42)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

    

In [None]:
def performace_check(y_pred,y_test):
    for name, model in models.items():
    print(f"Training the {name} model")
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test,y_pred)
    report = classification_report(y_test,y_pred)
    print(f"{name} model's accuracy is {acc:.2f}")
    print(f"Classification report is \n{report}")
    cm = confusion_matrix(y_test,y_pred)
    plt.figure(figsize = (6,4))
    sns.heatmap(cm,annot=True, fmt="d", cmap="Blues", xticklabels=['Negative','Neutral','Positive'], yticklabels=['Negative', 'Neutral','Positive'])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion matrix - {name}")
    plt.tight_layout()
    plt.show()
    

In [319]:
param_grid={
     'Logistic Regression': {
        'C': [0.1, 1, 10],
        'solver': ['liblinear', 'lbfgs']
    },
    'Random Forest': {
        'n_estimators': [100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'Support Vector Classifier (SVC)': {
        'C': [0.1, 1, 10],
            'kernel': ['linear', 'rbf']
    },
    'Naive Bayes': {},  # GaussianNB doesn't have many hyperparameters to tune
    'K-Nearest Neighbors': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance']
    },
    'Decision Tree': {
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'XGBoost': {
        'n_estimators': [100, 200],
        'max_depth': [3, 6],
        'learning_rate': [0.01, 0.1]
    },
    'AdaBoost': {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1]
    },
    'GradientBoost': {
        'n_estimators': [100, 200],
        'max_depth': [3, 6],
        'learning_rate': [0.01, 0.1]
    }
}

best_models={}

In [324]:
for name, model in models.items():
    print(f"\nTuning hyperparameters for {name}...")
    params = param_grid.get(name,{})
    if params:
        grid = GridSearchCV(model,params,cv=3,scoring='accuracy',n_jobs=-1,verbose=2)
        grid.fit(X_train,y_train)
        best_models[name] = grid.best_estimator_
        
    


Tuning hyperparameters for Logistic Regression...
Fitting 3 folds for each of 6 candidates, totalling 18 fits

Tuning hyperparameters for Random Forest...
Fitting 3 folds for each of 12 candidates, totalling 36 fits

Tuning hyperparameters for Support Vector Classifier (SVC)...
Fitting 3 folds for each of 6 candidates, totalling 18 fits

Tuning hyperparameters for Naive Bayes...

Tuning hyperparameters for K-Nearest Neighbors...
Fitting 3 folds for each of 6 candidates, totalling 18 fits

Tuning hyperparameters for Decision Tree...
Fitting 3 folds for each of 6 candidates, totalling 18 fits

Tuning hyperparameters for XGBoost...
Fitting 3 folds for each of 8 candidates, totalling 24 fits

Tuning hyperparameters for AdaBoost...
Fitting 3 folds for each of 8 candidates, totalling 24 fits


ValueError: Invalid parameter 'max_depth' for estimator AdaBoostClassifier(learning_rate=0.01). Valid parameters are: ['algorithm', 'estimator', 'learning_rate', 'n_estimators', 'random_state'].

In [None]:
user_input = input("Enter a sentence to check sentiment: ")
sentiment = predict_sentiment(user_input)
print(f"The sentiment of the input is: {sentiment}")