## Import necessary libraries 

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import StratifiedKFold

## Prepare the dataset

In [49]:
data={
    'text':["I love this product",
        "This is the worst thing ever",
        "I am so happy with the service",
        "I hate this",
        "It is okay, not bad",
        "Absolutely fantastic!",
        "Terrible experience",
        "I feel great",
        "Not good at all",
        "I like it",
        "The food was amazing",
        "I will never come back here",
        "The movie was okay",
        "What a wonderful day",
        "I do not like this at all",
        "The service was excellent",
        "I'm disappointed with the quality",
        "It's just average",
        "Great value for money",
        "Awful customer support",
        "Happy with my purchase",
        "Not impressed by the product",
        "Could be better",
        "Totally satisfied",
        "This is unacceptable",
        "It's fine, nothing special",
        "Excellent experience",
        "Worst purchase ever",
        "I feel so lucky to find this",
        "This is mediocre",
        "Highly recommend it",
        "Really bad",
        "Love it so much",
        "Not worth the price",
        "I enjoyed it",
        "Very disappointing",
        "Pretty good overall",
        "I do not recommend",
        "Fantastic quality",
        "It's not bad",
        "Very poor performance"],
    'label':['positive', 'negative', 'positive', 'negative', 'neutral',
        'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'neutral', 'positive', 'negative',
        'positive', 'negative', 'neutral', 'positive', 'positive',
        'negative', 'neutral', 'positive', 'negative', 'neutral',
        'positive', 'negative', 'positive', 'neutral', 'positive',
        'negative', 'positive', 'negative', 'positive', 'negative',
        'neutral', 'negative', 'positive', 'neutral', 'negative','negative']
}

In [21]:
print(len(data['text']))
print(len(data['label']))

41
41


In [23]:
df=pd.DataFrame(data)
df

Unnamed: 0,text,label
0,I love this product,positive
1,This is the worst thing ever,negative
2,I am so happy with the service,positive
3,I hate this,negative
4,"It is okay, not bad",neutral
5,Absolutely fantastic!,positive
6,Terrible experience,negative
7,I feel great,positive
8,Not good at all,negative
9,I like it,positive


## Preprocess the data

In [26]:
label_mapping={'positive':1,'neutral':0,'negative':-1}
df['label']=df['label'].map(label_mapping)
df['text']=df['text'].str.lower()
print(df)

                                 text  label
0                 i love this product      1
1        this is the worst thing ever     -1
2      i am so happy with the service      1
3                         i hate this     -1
4                 it is okay, not bad      0
5               absolutely fantastic!      1
6                 terrible experience     -1
7                        i feel great      1
8                     not good at all     -1
9                           i like it      1
10               the food was amazing      1
11        i will never come back here     -1
12                 the movie was okay      0
13               what a wonderful day      1
14          i do not like this at all     -1
15          the service was excellent      1
16  i'm disappointed with the quality     -1
17                  it's just average      0
18              great value for money      1
19             awful customer support      1
20             happy with my purchase     -1
21       n

## Stratified k Fold validation

In [59]:
x=df['text']
y=df['label']
skf=StratifiedKFold()
for fold, (train_idx, val_idx) in enumerate(skf.split(x, y), 1):
    X_train, X_val = x.iloc[train_idx], x.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
    print(f"Fold {fold}")
    print(y_train.value_counts(normalize=True))
    print(y_val.value_counts(normalize=True))


Fold 1
label
-1    0.40625
 1    0.40625
 0    0.18750
Name: proportion, dtype: float64
label
 1    0.444444
-1    0.333333
 0    0.222222
Name: proportion, dtype: float64
Fold 2
label
 1    0.393939
-1    0.393939
 0    0.212121
Name: proportion, dtype: float64
label
 1    0.500
-1    0.375
 0    0.125
Name: proportion, dtype: float64
Fold 3
label
 1    0.424242
-1    0.363636
 0    0.212121
Name: proportion, dtype: float64
label
-1    0.500
 1    0.375
 0    0.125
Name: proportion, dtype: float64
Fold 4
label
 1    0.424242
-1    0.393939
 0    0.181818
Name: proportion, dtype: float64
label
 1    0.375
-1    0.375
 0    0.250
Name: proportion, dtype: float64
Fold 5
label
 1    0.424242
-1    0.393939
 0    0.181818
Name: proportion, dtype: float64
label
 1    0.375
-1    0.375
 0    0.250
Name: proportion, dtype: float64


##  Vectorize text (convert text to number)

In [62]:
vectorizer=TfidfVectorizer()
x_train_vec=vectorizer.fit_transform(X_train)
x_test_vec=vectorizer.transform(X_test)

##  Train a classifier (Logistic Regression)


In [65]:
model=LogisticRegression(max_iter=1000, class_weight='balanced')
model.fit(x_train_vec,y_train)

## Evaluate the model

In [68]:
y_pred=model.predict(x_test_vec)
print("accuracy: ",accuracy_score(y_test,y_pred))
print("classification report: ",classification_report(y_test,y_pred))

accuracy:  0.7777777777777778
classification report:                precision    recall  f1-score   support

          -1       1.00      0.33      0.50         3
           0       0.67      1.00      0.80         2
           1       0.80      1.00      0.89         4

    accuracy                           0.78         9
   macro avg       0.82      0.78      0.73         9
weighted avg       0.84      0.78      0.74         9



## Test with custom input

In [77]:
def predict_sentiment(text):
    text_vec = vectorizer.transform([text.lower()])
    pred = model.predict(text_vec)[0]
    inv_map = {1:'positive', 0:'neutral', -1:'negative'}
    return inv_map[pred]


print(predict_sentiment("very greate"))
print(predict_sentiment("very greate"))

positive
