In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Loading Dataset
data = pd.read_csv("railway_complaints.csv")

# Text and Labels
X = data["Customer Complaint"]
y = data["Complaint Category"]

In [None]:
# Text Vectorization using TF-IDF
vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
X_tfidf = vectorizer.fit_transform(X)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [None]:
#SVM Model
svm_model = SVC(kernel="linear")
svm_model.fit(X_train, y_train)

# Predictions & Evaluation
svm_predictions = svm_model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, svm_predictions)*100)
print("SVM Classification Report:\n", classification_report(y_test, svm_predictions))

SVM Accuracy: 85.09316770186336
SVM Classification Report:
                                      precision    recall  f1-score   support

        Coach Cleanliness & Hygiene       1.00      0.74      0.85        35
Customer Service & Staff Complaints       0.90      0.79      0.84        24
             Food & Catering Issues       0.87      0.81      0.84        16
        Luggage & Belongings Issues       0.91      0.91      0.91        11
                   Other Complaints       0.44      0.60      0.51        30
             Seat Allocation Issues       1.00      0.92      0.96        24
         Security & Safety Concerns       0.88      0.93      0.90        30
     Ticketing & Reservation Issues       0.98      0.93      0.95        45
     Train & Station Infrastructure       0.84      0.90      0.87        62
       Train Delays & Cancellations       0.91      0.89      0.90        45

                           accuracy                           0.85       322
              

In [None]:
#KNN Model
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)

# Predictions & Evaluation
knn_predictions = knn_model.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, knn_predictions)*100)
print("KNN Classification Report:\n", classification_report(y_test, knn_predictions))

KNN Accuracy: 76.3975155279503
KNN Classification Report:
                                      precision    recall  f1-score   support

        Coach Cleanliness & Hygiene       0.68      0.77      0.72        35
Customer Service & Staff Complaints       0.75      0.88      0.81        24
             Food & Catering Issues       0.83      0.94      0.88        16
        Luggage & Belongings Issues       0.53      0.82      0.64        11
                   Other Complaints       0.36      0.13      0.20        30
             Seat Allocation Issues       0.95      0.75      0.84        24
         Security & Safety Concerns       0.79      0.87      0.83        30
     Ticketing & Reservation Issues       0.87      0.89      0.88        45
     Train & Station Infrastructure       0.76      0.82      0.79        62
       Train Delays & Cancellations       0.81      0.78      0.80        45

                           accuracy                           0.76       322
               

In [None]:
#Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy*100}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Random Forest Accuracy: 86.95652173913044

Classification Report:
                                      precision    recall  f1-score   support

        Coach Cleanliness & Hygiene       1.00      0.77      0.87        35
Customer Service & Staff Complaints       0.95      0.79      0.86        24
             Food & Catering Issues       0.93      0.81      0.87        16
        Luggage & Belongings Issues       0.92      1.00      0.96        11
                   Other Complaints       0.53      0.70      0.60        30
             Seat Allocation Issues       1.00      0.96      0.98        24
         Security & Safety Concerns       0.97      0.93      0.95        30
     Ticketing & Reservation Issues       0.98      0.91      0.94        45
     Train & Station Infrastructure       0.84      0.92      0.88        62
       Train Delays & Cancellations       0.85      0.89      0.87        45

                           accuracy                           0.87       322
       