# Task 5

In [17]:
from sklearn.cluster import AgglomerativeClustering
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb

# Supervised Leanring

Before Moving to Supervised Learning, we have to add a target class for the classifiers to predict as the orignal dataset does not have such feature, we will add it using clustering.

In [7]:
data = pd.read_csv('selected_features.csv')

X = data[['Annual Income (k$)', 'Spending Score (1-100)']]

agg_cluster = AgglomerativeClustering(n_clusters=5, metric='euclidean', linkage='ward')
data['CustomerSegment'] = agg_cluster.fit_predict(X)

print(data.head())


   Annual Income (k$)  Spending Score (1-100)  CustomerSegment
0           -1.738999               -0.434801                4
1           -1.738999                1.195704                3
2           -1.700830               -1.715913                4
3           -1.700830                1.040418                3
4           -1.662660               -0.395980                4


Now, we have "Customer Segment" as our target for classification

In [11]:
# Assume 'CustomerSegment' is the target column
X = data.drop(columns=['CustomerSegment'])
y = data['CustomerSegment']


In [14]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)



Random Forest Classifier

In [16]:
# Initialize RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Fit the model
rf.fit(X_train, y_train)

# Make predictions
y_pred = rf.predict(X_test)

# Evaluate the classifier
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.95
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.78      0.88         9
           1       1.00      1.00      1.00         7
           2       0.89      1.00      0.94        17
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         3

    accuracy                           0.95        40
   macro avg       0.98      0.96      0.96        40
weighted avg       0.96      0.95      0.95        40

Confusion Matrix:
 [[ 7  0  2  0  0]
 [ 0  7  0  0  0]
 [ 0  0 17  0  0]
 [ 0  0  0  4  0]
 [ 0  0  0  0  3]]


SVM

In [18]:
# 2. Support Vector Machine (SVM)
svm = SVC(random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
print("SVM - Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))


SVM - Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        17
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         3

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

Confusion Matrix:
 [[ 9  0  0  0  0]
 [ 0  7  0  0  0]
 [ 0  0 17  0  0]
 [ 0  0  0  4  0]
 [ 0  0  0  0  3]]


KNN

In [19]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN - Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))

KNN - Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        17
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         3

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

Confusion Matrix:
 [[ 9  0  0  0  0]
 [ 0  7  0  0  0]
 [ 0  0 17  0  0]
 [ 0  0  0  4  0]
 [ 0  0  0  0  3]]


Gradient Boosting

In [20]:
grad_boost = GradientBoostingClassifier(random_state=42)
grad_boost.fit(X_train, y_train)
y_pred_grad_boost = grad_boost.predict(X_test)
print("Gradient Boosting - Accuracy:", accuracy_score(y_test, y_pred_grad_boost))
print("Classification Report:\n", classification_report(y_test, y_pred_grad_boost))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_grad_boost))


Gradient Boosting - Accuracy: 0.925
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.67      0.80         9
           1       1.00      1.00      1.00         7
           2       0.85      1.00      0.92        17
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         3

    accuracy                           0.93        40
   macro avg       0.97      0.93      0.94        40
weighted avg       0.94      0.93      0.92        40

Confusion Matrix:
 [[ 6  0  3  0  0]
 [ 0  7  0  0  0]
 [ 0  0 17  0  0]
 [ 0  0  0  4  0]
 [ 0  0  0  0  3]]


XGBoost

In [21]:
xgb_model = xgb.XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
print("XGBoost - Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("Classification Report:\n", classification_report(y_test, y_pred_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_xgb))

XGBoost - Accuracy: 0.925
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.67      0.80         9
           1       1.00      1.00      1.00         7
           2       0.85      1.00      0.92        17
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         3

    accuracy                           0.93        40
   macro avg       0.97      0.93      0.94        40
weighted avg       0.94      0.93      0.92        40

Confusion Matrix:
 [[ 6  0  3  0  0]
 [ 0  7  0  0  0]
 [ 0  0 17  0  0]
 [ 0  0  0  4  0]
 [ 0  0  0  0  3]]
