In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
from transformers import DataCollatorWithPadding
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

In [None]:
data = {
    "complaint_text": [
        "My professor hasn’t updated grades for over a month.",
        "The hostel bathrooms are not being cleaned regularly.",
        "I can’t register for my electives on the portal.",
        "There is harassment happening in my department.",
        "The internet connection in my room is not working.",
        "The course syllabus was changed without notice.",
        "My account is blocked after multiple failed logins.",
        "Mess food quality has become very poor.",
        "I have been unfairly marked absent in one lecture.",
        "I lost my ID card and need help getting a duplicate."
    ],
    "category": [
        "Academic Issue",
        "Management/Hostel Issue",
        "Registration Problem",
        "Conduct-related Issue",
        "Management/Hostel Issue",
        "Academic Issue",
        "Registration Problem",
        "Management/Hostel Issue",
        "Academic Issue",
        "Registration Problem"
    ]
}

In [None]:
df = pd.DataFrame(data)
print("Sample Data:\n", df.head())

In [None]:
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["category"])
num_classes = len(label_encoder.classes_)

In [None]:
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df["complaint_text"], df["label"], test_size=0.2, random_state=42
)

In [None]:
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

train_encodings = tokenizer(
    list(train_texts), truncation=True, padding=True, max_length=128
)
test_encodings = tokenizer(
    list(test_texts), truncation=True, padding=True, max_length=128
)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    list(train_labels)
))
test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    list(test_labels)
))

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

train_dataset = train_dataset.shuffle(100).batch(4).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(4).prefetch(tf.data.AUTOTUNE)

In [None]:
model = TFDistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=num_classes
)

optimizer = Adam(learning_rate=5e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = ["accuracy"]

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
model.fit(train_dataset, validation_data=test_dataset, epochs=30)

In [None]:
predictions = model.predict(test_dataset)
y_pred = np.argmax(predictions.logits, axis=1)

print("\nClassification Report:\n")
print(classification_report(test_labels, y_pred, target_names=label_encoder.classes_))

In [None]:
def route_complaint(text):
    """Predict complaint category and assign a handler"""
    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
    outputs = model(inputs)
    pred_label = np.argmax(outputs.logits, axis=1)[0]
    category = label_encoder.inverse_transform([pred_label])[0]

    routing_map = {
        "Academic Issue": "Faculty Mentor",
        "Registration Problem": "Academic Office",
        "Conduct-related Issue": "Student Welfare Dean",
        "Management/Hostel Issue": "Hostel/Admin Department"
    }

    handler = routing_map.get(category, "General Support Team")
    return {"Category": category, "Routed To": handler}

In [None]:
sample_complaints = [
    "My attendance has not been updated properly.",
    "The hostel rooms are too noisy and not maintained well.",
    "I was unfairly accused of plagiarism."
]

for text in sample_complaints:
    result = route_complaint(text)
    print(f"\nComplaint: {text}")
    print(f" → Category: {result['Category']}")
    print(f" → Routed To: {result['Routed To']}")
