<a href="https://colab.research.google.com/github/Anjana71/sentiment_analysis_project/blob/main/sentiment_analysis_proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import zipfile
import os

zip_path = "/content/archive (2).zip"
extract_dir = "/content/emotion_data"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print("‚úÖ Extracted contents:")
for root, dirs, files in os.walk(extract_dir):
    print(f"üìÅ {root}")
    for file in files:
        print(f"    üìÑ {file}")


‚úÖ Extracted contents:
üìÅ /content/emotion_data
    üìÑ val.txt
    üìÑ train.txt
    üìÑ test.txt


In [20]:
import pandas as pd

def load_emotion_data(folder_path):
    texts, labels = [], []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".txt"):
                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                    for line in f:
                        if ";" in line:
                            parts = line.strip().split(";")
                            if len(parts) == 2:
                                text, label = parts
                                if text.strip() and label.strip():
                                    texts.append(text.strip())
                                    labels.append(label.strip())
    return pd.DataFrame({"text": texts, "label": labels})


In [23]:
import pandas as pd

def load_single_file(path):
    texts, labels = [], []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split(';')
            if len(parts) == 2:
                text, label = parts
                if text.strip() and label.strip():
                    texts.append(text.strip())
                    labels.append(label.strip())
    return pd.DataFrame({"text": texts, "label": labels})

# Load each file
train_df = load_single_file("/content/emotion_data/train.txt")
val_df = load_single_file("/content/emotion_data/val.txt")
test_df = load_single_file("/content/emotion_data/test.txt")

# Combine them
df = pd.concat([train_df, val_df, test_df], ignore_index=True)

# Preview
print(f"‚úÖ Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)} | Total: {len(df)}")
print(df.sample(5))


‚úÖ Train: 16000 | Val: 2000 | Test: 2000 | Total: 20000
                                                    text    label
10680  i am still numb i question everything about wh...     fear
19761  i write this i giggle and shake my head in hum...      joy
9229           i know but it still feels very unpleasant  sadness
16239  i feel strongly that those who finger point an...      joy
11367  i get the feeling that if the tabloids either ...  sadness


In [24]:
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    return text

df['cleaned'] = df['text'].apply(clean_text)

X = df['cleaned']
y = df['label']

vectorizer = TfidfVectorizer(max_features=3000)
X_vec = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)

print("‚úÖ Model trained")
print(classification_report(y_test, model.predict(X_test)))


‚úÖ Model trained
              precision    recall  f1-score   support

       anger       0.95      0.52      0.67       540
        fear       0.86      0.47      0.61       442
         joy       0.68      0.98      0.80      1335
        love       0.97      0.17      0.30       332
     sadness       0.76      0.94      0.84      1195
    surprise       1.00      0.03      0.06       156

    accuracy                           0.74      4000
   macro avg       0.87      0.52      0.55      4000
weighted avg       0.80      0.74      0.70      4000



In [26]:
while True:
    user_input = input("üîç Enter a sentence (type 'exit' to stop): ")
    if user_input.lower() == "exit":
        print("üëã Exiting manual prediction...")
        break
    cleaned = clean_text(user_input)
    vec = vectorizer.transform([cleaned])
    pred = model.predict(vec)[0]
    print(f"üëâ Predicted Emotion: {pred}")


üîç Enter a sentence (type 'exit' to stop): i am happy, today is really a good day
üëâ Predicted Emotion: joy
üîç Enter a sentence (type 'exit' to stop): i didnt expect this, things are going in a wrong way 
üëâ Predicted Emotion: sadness
üîç Enter a sentence (type 'exit' to stop): exit
üëã Exiting manual prediction...


In [27]:
import gradio as gr
import matplotlib.pyplot as plt
import io
from PIL import Image

def predict_emotion(text):
    try:
        cleaned = clean_text(text)
        vec = vectorizer.transform([cleaned])
        pred = model.predict(vec)[0]
        probs = model.predict_proba(vec)[0]
        prob_dict = dict(zip(model.classes_, probs))

        plt.figure(figsize=(5, 3))
        plt.bar(prob_dict.keys(), prob_dict.values(), color='skyblue')
        plt.title("Emotion Confidence")
        plt.xticks(rotation=45)
        plt.tight_layout()
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        plt.close()
        buf.seek(0)
        image = Image.open(buf)

        return f"**Predicted Emotion:** {pred}", image
    except Exception as e:
        return f"‚ùå Error: {str(e)}", None

with gr.Blocks() as demo:
    gr.Markdown("## üß† Emotion Detection using Classical ML")
    inp = gr.Textbox(label="Enter text")
    out_text = gr.Markdown()
    out_plot = gr.Image(type="pil")
    btn = gr.Button("Predict")
    btn.click(fn=predict_emotion, inputs=inp, outputs=[out_text, out_plot])

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1f81fdcf6a9617c17b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


