In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

df = pd.read_csv("/content/drive/MyDrive/AKAIKE/output.csv")

X = df.iloc[:, 0]

y = df.iloc[:, 1]

vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_vec = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

In [None]:
#NAIVE BAYES
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

nb = MultinomialNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
print("Naïve Bayes:\n", classification_report(y_test, y_pred_nb))

Naïve Bayes:
               precision    recall  f1-score   support

      Change       0.87      0.58      0.70       479
    Incident       0.65      0.92      0.76      1920
     Problem       0.45      0.14      0.22      1009
     Request       0.87      0.90      0.89      1392

    accuracy                           0.72      4800
   macro avg       0.71      0.64      0.64      4800
weighted avg       0.69      0.72      0.68      4800



In [None]:
#LINEAR SVM
from sklearn.svm import LinearSVC

svm = LinearSVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
print("SVM:\n", classification_report(y_test, y_pred_svm))

SVM:
               precision    recall  f1-score   support

      Change       0.90      0.82      0.86       479
    Incident       0.70      0.80      0.75      1920
     Problem       0.52      0.39      0.45      1009
     Request       0.92      0.93      0.92      1392

    accuracy                           0.75      4800
   macro avg       0.76      0.73      0.74      4800
weighted avg       0.75      0.75      0.75      4800



In [None]:
#DECISION TREE
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
print("Decision Tree:\n", classification_report(y_test, y_pred_dt))

Decision Tree:
               precision    recall  f1-score   support

      Change       0.61      0.61      0.61       479
    Incident       0.66      0.68      0.67      1920
     Problem       0.42      0.40      0.41      1009
     Request       0.82      0.82      0.82      1392

    accuracy                           0.65      4800
   macro avg       0.63      0.63      0.63      4800
weighted avg       0.65      0.65      0.65      4800



In [None]:
#RANDOM FOREST
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest:\n", classification_report(y_test, y_pred_rf))

Random Forest:
               precision    recall  f1-score   support

      Change       0.95      0.63      0.76       479
    Incident       0.66      0.97      0.79      1920
     Problem       0.87      0.16      0.26      1009
     Request       0.87      0.92      0.89      1392

    accuracy                           0.75      4800
   macro avg       0.84      0.67      0.68      4800
weighted avg       0.79      0.75      0.71      4800



In [None]:
#GRID SEARCH
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

param_grid = {'alpha': [0.1, 0.5, 1.0]}
grid = GridSearchCV(MultinomialNB(), param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)
print("Best Naïve Bayes params:", grid.best_params_)
y_pred = grid.best_estimator_.predict(X_test)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Best Naïve Bayes params: {'alpha': 0.1}

Classification Report:

              precision    recall  f1-score   support

      Change       0.90      0.71      0.79       479
    Incident       0.67      0.91      0.77      1920
     Problem       0.52      0.23      0.32      1009
     Request       0.90      0.90      0.90      1392

    accuracy                           0.74      4800
   macro avg       0.75      0.69      0.70      4800
weighted avg       0.73      0.74      0.72      4800



In [None]:
#COMBINATION OF MULTIPLE MODELS
from sklearn.ensemble import VotingClassifier

ensemble = VotingClassifier(estimators=[
    ('nb', MultinomialNB()),
    ('rf', RandomForestClassifier(n_estimators=100)),
    ('svm', LinearSVC())
], voting='hard')

ensemble.fit(X_train, y_train)
print("Ensemble Accuracy:", ensemble.score(X_test, y_test))


Ensemble Accuracy: 0.753125
