In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
df = pd.read_csv(r"E:\Projects\ProjectML\balanced_complaints_dataset.csv")

# Splitting features and target variable
X = df['Complaint']
y = df['Priority']

# Convert text data into numerical vectors using TF-IDF
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(X)

# Splitting into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42, stratify=y)

# Apply SMOTE to balance classes (if needed)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Train the model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train_resampled, y_train_resampled)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the model and vectorizer
import joblib
joblib.dump(model, "complaint_priority_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

print("✅ Model training complete. Files saved successfully!")


Accuracy: 0.425
Classification Report:
               precision    recall  f1-score   support

           1       0.38      0.46      0.41        13
           2       0.41      0.50      0.45        14
           3       0.57      0.31      0.40        13

    accuracy                           0.42        40
   macro avg       0.45      0.42      0.42        40
weighted avg       0.45      0.42      0.42        40

✅ Model training complete. Files saved successfully!


In [6]:
import joblib

# Load the trained model and vectorizer
model = joblib.load("complaint_priority_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

def predict_priority(complaint):
    # Transform the complaint using TF-IDF
    complaint_tfidf = vectorizer.transform([complaint])
    
    # Predict the priority
    priority = model.predict(complaint_tfidf)[0]
    
    return priority

# Example usage
sample_complaint = "There is no electricity in the hostel!"
predicted_priority = predict_priority(sample_complaint)
print(f"Predicted Priority: {predicted_priority}")



Predicted Priority: 3
