In [1]:
import re
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [2]:
positive_texts = [
    "Sir aaj ki class bahut achhi thi",
    "Lecture bahut interesting tha",
    "Concept clearly samajh aa gaya",
    "Sir ka padhane ka style achha hai",
    "Class me maza aaya",
    "Sir ne examples ke sath samjhaya",
    "Aaj ka lecture helpful tha",
    "Sir bahut clearly padhate hai",
    "Class engaging thi",
    "Sir ke explanation se doubt clear ho gaye",

    "Aaj ki class productive thi",
    "Sir ka explanation easy tha",
    "Concept simple language me samjhaya",
    "Sir ne time par syllabus complete kiya",
    "Lecture enjoyable tha",
    "Sir ne students ke questions liye",
    "Class ka environment achha tha",
    "Padhai interesting lag rahi hai",
    "Sir ke notes helpful hai",
    "Aaj ka topic achha laga",

    "Sir motivate karte hai",
    "Class me focus bana raha",
    "Sir ka approach achha hai",
    "Lecture achhi pace me tha",
    "Sir friendly hai",
    "Concept practical examples se samjha",
    "Class interactive thi",
    "Sir clearly bolte hai",
    "Aaj padhai samajh aayi",
    "Class ka experience achha raha",

    "Sir ki teaching style superb hai",
    "Har topic detail mein cover kiya gaya",
    "Learning experience bahut zabardast tha",
    "Mujhe laga ki main bohot kuch seekh gaya",
    "Questions poochne me koi jhijhak nahi hui",
    "Pure lecture me energy bani rahi",
    "Sir ka sense of humor bhi achha hai",
    "Complex topics bhi aasan lage",
    "Notes aur slides dono kaam ke the",
    "Ekdum perfect class thi aaj ki",

    "Aaj ka session mind-blowing tha",
    "Har cheez crystal clear ho gayi",
    "Itna confidence mila ki kya bataun",
    "Sir jaisa teacher milna mushkil hai",
    "Class mein time kaise nikal gaya pata hi nahi chala",
    "Sabse best part tha interactive sessions",
    "Maza aa gaya padhke aaj",
    "Definitely recommend karunga is class ko",
    "Padhai ko interesting bana dete hai sir",
    "Awesome experience, thank you sir",

    "Really enjoyed the lecture today",
    "Concepts became so much clearer",
    "Sir has a great way of explaining things",
    "Feeling very positive about my learning",
    "The examples were spot on",
    "Highly engaging and informative class",
    "My doubts were completely resolved",
    "The content was very relevant and useful",
    "Looking forward to the next class",
    "A very enriching learning session"
]

In [3]:
negative_texts = [
    "Aaj ki class boring thi",
    "Lecture samajh nahi aaya",
    "Sir bahut fast padhate hai",
    "Class me interest nahi bana",
    "Aaj ka topic confusing tha",
    "Sir ka explanation clear nahi tha",
    "Lecture dull lag raha tha",
    "Class me focus nahi ho paaya",
    "Sir examples nahi dete",
    "Aaj padhai mushkil lagi",

    "Class ka pace bahut fast tha",
    "Topic properly explain nahi hua",
    "Sir repeat nahi karte",
    "Class me clarity nahi thi",
    "Lecture boring ho gaya",
    "Aaj class me maza nahi aaya",
    "Sir ke notes samajh nahi aaye",
    "Class me confusion raha",
    "Concept tough laga",
    "Sir thoda slow padhaye",

    "Lecture exhausting tha",
    "Class ka flow achha nahi tha",
    "Sir ne doubts clear nahi kiye",
    "Topic boring laga",
    "Class me attention nahi bana",
    "Sir jaldi jaldi padha rahe the",
    "Lecture heavy laga",
    "Class me interest kam tha",
    "Topic achha explain nahi hua",
    "Samajhne me dikkat hui",

    "Sir ne aaj bilkul interact nahi kiya",
    "Class bahut lambi lagi",
    "Mera time waste ho gaya",
    "Is topic se dimaag kharab ho gaya",
    "Pura session frustrating tha",
    "Homework bahut zyada de diya",
    "Mujhe kuch bhi yaad nahi raha",
    "Sir ka mood achha nahi tha lagta hai",
    "Next class attend nahi karunga",
    "Ye sab mere sir ke upar se gaya",

    "Lecture me bahut disturbance thi",
    "Aaj ki class pointless thi",
    "Sir ne humein bore kar diya",
    "Online class offline jaisi feel nahi hui",
    "Expected se bahut kam tha aaj ka lecture",
    "Lagta hai sir ko bhi interest nahi tha",
    "Sab kuch bahut confusing tha",
    "Mera man nahi laga padhai me",
    "Is topic ko revise karna padega alag se",
    "Sir ne details me nahi bataya",

    "This class was a waste of time",
    "Couldn't understand anything today",
    "The teacher was too fast",
    "Lost all my interest in the subject",
    "The topic was extremely confusing",
    "Explanation was very unclear",
    "Felt sleepy throughout the lecture",
    "Couldn't focus at all",
    "No proper examples were given",
    "Found today's study very difficult"
]

In [4]:
neutral_texts = [
    "Aaj class normal thi",
    "Lecture theek tha",
    "Topic average laga",
    "Class routine jaisi thi",
    "Aaj kuch khas nahi tha",
    "Lecture okay tha",
    "Class usual thi",
    "Topic manageable tha",
    "Sir ne padhaya",
    "Class chal rahi thi",

    "Lecture expected jaisa tha",
    "Aaj ka topic average tha",
    "Class me kuch naya nahi tha",
    "Lecture standard tha",
    "Class me koi problem nahi thi",
    "Topic theek se cover hua",
    "Lecture time par khatam hua",
    "Class me normal discussion hui",
    "Sir ne syllabus follow kiya",
    "Class smooth thi",

    "Lecture informative tha",
    "Topic understandable tha",
    "Class me balance tha",
    "Sir ka style same tha",
    "Lecture me flow tha",
    "Topic predictable tha",
    "Class regular thi",
    "Lecture decent tha",
    "Topic acceptable laga",
    "Class satisfactory thi",

    "Class me na kuch achha tha na bura",
    "Aaj ka session bas ho gaya",
    "Pura lecture suna par impact kuch nahi hua",
    "Jo sikhaya gaya woh theek tha",
    "Sir ne apna kaam kiya",
    "Class ka experience bas average tha",
    "Aisa hi hota hai roj",
    "Attendance ke liye aaya tha",
    "Kuch naya nahi tha aaj",
    "Bas sunte rahe sab",

    "Class me zyada excitement nahi thi",
    "Topic was neither good nor bad",
    "Sir did his job well",
    "It was just another class",
    "Nothing special happened today",
    "The lecture was okay, not great, not bad",
    "I learned something, but it wasn't groundbreaking",
    "The pace was normal, not too fast, not too slow",
    "The environment was fine, no issues",
    "It was a standard learning experience",

    "Class khatam ho gayi",
    "Sab theek tha",
    "Koi issue nahi tha",
    "Bas ho gaya kaam",
    "Roz jaisa hi tha",
    "Syllabus cover ho gaya",
    "Sir ne padha diya",
    "It was what I expected",
    "No complaints",
    "Content was delivered"
]

In [5]:
import pandas as pd

In [6]:
texts = positive_texts + negative_texts + neutral_texts

labels = (
    ["Positive"] * len(positive_texts) +
    ["Negative"] * len(negative_texts) +
    ["Neutral"] * len(neutral_texts)
)

df = pd.DataFrame({
    "text": texts,
    "label": labels
})

label2id = {"Positive":0, "Negative":1, "Neutral":2}
id2label = {v:k for k,v in label2id.items()}
df["label_id"] = df["label"].map(label2id)

df.head()

Unnamed: 0,text,label,label_id
0,Sir aaj ki class bahut achhi thi,Positive,0
1,Lecture bahut interesting tha,Positive,0
2,Concept clearly samajh aa gaya,Positive,0
3,Sir ka padhane ka style achha hai,Positive,0
4,Class me maza aaya,Positive,0


In [None]:
df.to_csv("feedback.csv", index=False)

In [9]:
import regex as re

In [8]:
df = pd.read_csv("feedback.csv")

In [9]:
def clean_text(text):
    text = text.lower()
    # Keep Hindi characters (U+0900-U+097F), English letters (a-zA-Z), and spaces
    text = re.sub(r'[^\u0900-\u097F a-zA-Z]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()


In [10]:
df["cleaned"] = df["text"].apply(clean_text)
df.head()

Unnamed: 0,text,label,label_id,cleaned
0,Sir aaj ki class bahut achhi thi,Positive,0,sir aaj ki class bahut achhi thi
1,Lecture bahut interesting tha,Positive,0,lecture bahut interesting tha
2,Concept clearly samajh aa gaya,Positive,0,concept clearly samajh aa gaya
3,Sir ka padhane ka style achha hai,Positive,0,sir ka padhane ka style achha hai
4,Class me maza aaya,Positive,0,class me maza aaya


In [11]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["cleaned"])
y = df["label_id"]

print("TF-IDF vectorization complete. X and y variables are created with expanded data.")

TF-IDF vectorization complete. X and y variables are created with expanded data.


In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print("Data split into training and testing sets. Logistic Regression model trained with expanded data.")

Data split into training and testing sets. Logistic Regression model trained with expanded data.


In [13]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Test Data: {accuracy:.4f}")

Model Accuracy on Test Data: 0.7222


In [14]:
def predict_sentiment(sentence):
    # 1. Clean the input sentence
    cleaned_sentence = clean_text(sentence)

    # 2. Vectorize the cleaned sentence using the trained vectorizer
    # TfidfVectorizer.transform expects an iterable, so pass in a list
    vectorized_sentence = vectorizer.transform([cleaned_sentence])

    # 3. Predict the sentiment using the trained model
    numerical_label = model.predict(vectorized_sentence)[0]

    # 4. Convert the numerical label back to its corresponding sentiment string
    predicted_sentiment = id2label[numerical_label]

    return predicted_sentiment

print("predict_sentiment function defined successfully.")

predict_sentiment function defined successfully.


In [15]:
predicted_sentiment = predict_sentiment("Sir aaj ki class bahut achhi thi")
print(f"Sentence: 'Sir aaj ki class bahut achhi thi' -> Predicted Sentiment: {predicted_sentiment}")

Sentence: 'Sir aaj ki class bahut achhi thi' -> Predicted Sentiment: Positive


In [16]:
predicted_sentiment = predict_sentiment("Lecture boring tha aur kuch samajh nahi aaya")
print(f"Sentence: 'Lecture boring tha aur kuch samajh nahi aaya' -> Predicted Sentiment: {predicted_sentiment}")

Sentence: 'Lecture boring tha aur kuch samajh nahi aaya' -> Predicted Sentiment: Negative


In [17]:
predicted_sentiment = predict_sentiment("Class theek thi, kuch naya nahi sikha")
print(f"Sentence: 'Class theek thi, kuch naya nahi sikha' -> Predicted Sentiment: {predicted_sentiment}")

Sentence: 'Class theek thi, kuch naya nahi sikha' -> Predicted Sentiment: Neutral


In [18]:
new_hinglish_sentences = [
    "Sir ki class amazing thi, sab kuch clear ho gaya!", # Positive
    "Lecture bahut bekar tha, time waste ho gaya",    # Negative
    "Class theek thi, kuch naya nahi sikha",          # Neutral
    "Aaj ki padhai bahut productive thi",             # Positive
    "Mujhe kuch samajh nahi aaya, sir bahut fast the",# Negative
    "Overall experience average tha",                 # Neutral
    "Sir ne doubts bahut achhe se solve kiye",        # Positive
    "Aaj mood kharab ho gaya class me",               # Negative
    "Class normal chali",                             # Neutral
    "Best class ever!"
]

print("Predicting sentiments for new Hinglish sentences:")
for sentence in new_hinglish_sentences:
    predicted_sentiment = predict_sentiment(sentence)
    print(f"Sentence: '{sentence}' -> Predicted Sentiment: {predicted_sentiment}")

Predicting sentiments for new Hinglish sentences:
Sentence: 'Sir ki class amazing thi, sab kuch clear ho gaya!' -> Predicted Sentiment: Neutral
Sentence: 'Lecture bahut bekar tha, time waste ho gaya' -> Predicted Sentiment: Negative
Sentence: 'Class theek thi, kuch naya nahi sikha' -> Predicted Sentiment: Neutral
Sentence: 'Aaj ki padhai bahut productive thi' -> Predicted Sentiment: Negative
Sentence: 'Mujhe kuch samajh nahi aaya, sir bahut fast the' -> Predicted Sentiment: Negative
Sentence: 'Overall experience average tha' -> Predicted Sentiment: Neutral
Sentence: 'Sir ne doubts bahut achhe se solve kiye' -> Predicted Sentiment: Negative
Sentence: 'Aaj mood kharab ho gaya class me' -> Predicted Sentiment: Negative
Sentence: 'Class normal chali' -> Predicted Sentiment: Neutral
Sentence: 'Best class ever!' -> Predicted Sentiment: Positive


In [22]:
import joblib

# Save TF-IDF vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

# Save trained model
joblib.dump(model, "sentiment_model.pkl")

print("Model and vectorizer saved successfully!")


Model and vectorizer saved successfully!
