In [6]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
df = pd.read_csv("cleaned_travel_dataset.csv")

# Encode categorical features
budget_encoder = LabelEncoder()
mode_encoder = LabelEncoder()

df["BudgetEncoded"] = budget_encoder.fit_transform(df["Budget"])
df["ModeEncoded"] = mode_encoder.fit_transform(df["Mode"])

# Prepare training data
X = df[["Traveller", "BudgetEncoded"]].values
y = df["ModeEncoded"].values

# Split data for training/testing (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------
# 1️⃣ Decision Tree Classifier
print("\n🌳 Decision Tree Classifier:")
dt_model = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"✅ Accuracy: {accuracy_dt * 100:.2f}%")
print("📊 Classification Report:\n", classification_report(y_test, y_pred_dt, target_names=mode_encoder.classes_))

# -------------------------
# 3️⃣ K-Nearest Neighbors (KNN)
print("\n👥 K-Nearest Neighbors (KNN):")
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"✅ Accuracy: {accuracy_knn * 100:.2f}%")
print("📊 Classification Report:\n", classification_report(y_test, y_pred_knn, target_names=mode_encoder.classes_))

# 4️⃣ Random Forest Classifier
print("\n🌲 Random Forest Classifier:")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"✅ Accuracy: {accuracy_rf * 100:.2f}%")
print("📊 Classification Report:\n", classification_report(y_test, y_pred_rf, target_names=mode_encoder.classes_))



🌳 Decision Tree Classifier:
✅ Accuracy: 95.10%
📊 Classification Report:
               precision    recall  f1-score   support

         Bus       1.00      0.86      0.92        35
      Flight       0.95      1.00      0.97        35
       Train       0.91      1.00      0.96        32

    accuracy                           0.95       102
   macro avg       0.95      0.95      0.95       102
weighted avg       0.95      0.95      0.95       102


👥 K-Nearest Neighbors (KNN):
✅ Accuracy: 94.12%
📊 Classification Report:
               precision    recall  f1-score   support

         Bus       0.89      0.94      0.92        35
      Flight       0.95      1.00      0.97        35
       Train       1.00      0.88      0.93        32

    accuracy                           0.94       102
   macro avg       0.95      0.94      0.94       102
weighted avg       0.94      0.94      0.94       102


🌲 Random Forest Classifier:
✅ Accuracy: 93.14%
📊 Classification Report:
               p