In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# 1. Load dataset from train.txt
def load_dataset(file_path):
    texts = []
    labels = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if ';' in line:
                parts = line.strip().split(';')
                if len(parts) == 2:
                    texts.append(parts[0])
                    labels.append(parts[1])
    return pd.DataFrame({'text': texts, 'label': labels})

df = load_dataset("train.txt")
print("Loaded dataset:")
print(df.head())

# 2. Train-test split
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# 4. Model Training
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# 5. Evaluation
y_pred = model.predict(X_test_vec)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# 6. Confusion Matrix
plt.figure(figsize=(8, 6))
labels_sorted = sorted(model.classes_)
sns.heatmap(confusion_matrix(y_test, y_pred, labels=labels_sorted),
            annot=True, fmt='d',
            xticklabels=labels_sorted,
            yticklabels=labels_sorted,
            cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.tight_layout()


# 7. User Input Prediction
def predict_emotion(text):
    vec = vectorizer.transform([text])
    pred = model.predict(vec)[0]
    prob = model.predict_proba(vec).max()
    print(f"\n🗣 Input: {text}\n🎯 Predicted Emotion: {pred} (Confidence: {prob:.2f})")

# Run prediction
a = input("Enter a sentence to analyze emotion: ")
predict_emotion(a)