# Feedback Classifier – Training Notebook

In [None]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score
import numpy as np

# Load the data
df = pd.read_csv("../data/dummy_feedback.csv")

# Clean text
df['feedback'] = df['feedback'].str.lower().str.strip()
df['topics'] = df['topics'].str.lower().str.strip()
df['topics'] = df['topics'].apply(lambda x: [t.strip() for t in x.split(',')])

# Multi-label binarisation
mlb = MultiLabelBinarizer()
topic_labels = mlb.fit_transform(df['topics'])

# TF-IDF vectorisation
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X = vectorizer.fit_transform(df['feedback'])

# Prepare targets
y_sentiment = df['sentiment']
y_combined = np.hstack([y_sentiment.values.reshape(-1, 1), topic_labels])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_combined, test_size=0.2, random_state=42)

# Train model
model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)
y_test_sentiment, y_test_topics = y_test[:, 0], y_test[:, 1:]
y_pred_sentiment, y_pred_topics = y_pred[:, 0], y_pred[:, 1:]

# Evaluate
print("Sentiment Accuracy:", accuracy_score(y_test_sentiment, y_pred_sentiment))
print("Topic Precision:", precision_score(y_test_topics, y_pred_topics, average='micro'))
print("Topic Recall:", recall_score(y_test_topics, y_pred_topics, average='micro'))
print("Topic F1 Score:", f1_score(y_test_topics, y_pred_topics, average='micro'))
