In [3]:
import pandas as pd

train=pd.read_csv('train.csv')
test=pd.read_csv('test.csv')
dev=pd.read_csv('dev.csv')

train=train.dropna()
test=test.dropna()
dev=dev.dropna()

x_train, y_train = train['text'], train['sentiment']
x_test, y_test = test['text'], test['sentiment']
x_dev, y_dev = dev['text'], dev['sentiment']



In [19]:
#+++++++++++++++++++++++++++++++++++++++++++++++++
#  Basline model 1: TF-IDF - Logistic Regression +
#+++++++++++++++++++++++++++++++++++++++++++++++++

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

tfidf = TfidfVectorizer(
    analyzer="char_wb",
    ngram_range=(3,5),
    min_df=3,
    max_features=50000
)

x_train_tfidf = tfidf.fit_transform(x_train)
x_dev_tfidf = tfidf.transform(x_dev)

model1=LogisticRegression(class_weight="balanced",max_iter=2000, C=0.5)

model1.fit(x_train_tfidf, y_train)

y_dev_pred=model1.predict(x_dev_tfidf)
metrics=classification_report(y_dev, y_dev_pred)
print(metrics)

                precision    recall  f1-score   support

Mixed_feelings       0.30      0.47      0.37       508
      Negative       0.41      0.61      0.49       521
      Positive       0.89      0.70      0.78      2444

      accuracy                           0.65      3473
     macro avg       0.53      0.59      0.55      3473
  weighted avg       0.73      0.65      0.68      3473



In [20]:
texts=["Vera level padam da maplla","kevalamana song da chai..","nalla padam but only song nalla illa"]
for text in texts:
  sentiment=model1.predict(tfidf.transform([text]))
  print(f"{text:<40}:{sentiment}")

Vera level padam da maplla              :['Positive']
kevalamana song da chai..               :['Negative']
nalla padam but only song nalla illa    :['Mixed_feelings']


In [17]:
#+++++++++++++++++++++++++++++++++++++++++++++++++
#  Basline model 2: TF-IDF - Linear SVM          +
#+++++++++++++++++++++++++++++++++++++++++++++++++

from sklearn.svm import LinearSVC

model2 = LinearSVC(class_weight="balanced", max_iter=5000, C=0.5)
model2.fit(x_train_tfidf, y_train)

y_dev_pred=model2.predict(x_dev_tfidf)
metrics_svm = classification_report(y_dev, y_dev_pred)
print(metrics_svm)

                precision    recall  f1-score   support

Mixed_feelings       0.36      0.33      0.34       508
      Negative       0.48      0.51      0.49       521
      Positive       0.84      0.84      0.84      2444

      accuracy                           0.72      3473
     macro avg       0.56      0.56      0.56      3473
  weighted avg       0.71      0.72      0.72      3473



In [18]:
texts=["Vera level padam da maplla","kevalamana song da chai..","nalla padam but only song nalla illa"]
for text in texts:
  sentiment=model2.predict(tfidf.transform([text]))
  print(f"{text:<40}:{sentiment}")

Vera level padam da maplla              :['Positive']
kevalamana song da chai..               :['Negative']
nalla padam but only song nalla illa    :['Mixed_feelings']
