In [6]:
import numpy as np
import pandas as pd
from sklearn.cluster import MeanShift
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load features and labels
data = pd.read_csv("extracted_features.csv")
X = data.iloc[:, 1:].values  
y = data.iloc[:, 0].values  




In [13]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply Mean Shift clustering
mean_shift = MeanShift()
clusters = mean_shift.fit_predict(X_scaled)

# Append cluster labels as an additional feature
X_with_clusters = np.column_stack((X_scaled, clusters))

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_with_clusters, y, test_size=0.2, random_state=42, stratify=y)

# Train Decision Tree Classifier
dt = DecisionTreeClassifier(criterion="gini", max_depth=50, random_state=42)
dt.fit(X_train, y_train)

# Predict
y_pred_train = dt.predict(X_train)
y_pred_test = dt.predict(X_test)

# Evaluate
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Train Accuracy: 0.9006
Test Accuracy: 0.1417


In [9]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

# Apply PCA to retain 95% variance
pca = PCA(n_components=0.95)
X_pca = pca.fit_transform(X_scaled)
y_encoded = pd.factorize(y)[0]  # Encode labels

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Train KNN
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn.fit(X_train, y_train)

# Evaluate
train_acc = knn.score(X_train, y_train)
test_acc = knn.score(X_test, y_test)

print(f"KNN with PCA - Train Accuracy: {train_acc:.4f}")
print(f"KNN with PCA - Test Accuracy: {test_acc:.4f}")


KNN with PCA - Train Accuracy: 0.6102
KNN with PCA - Test Accuracy: 0.4123


In [10]:
from sklearn.cluster import KMeans

# Apply KMeans clustering
kmeans = KMeans(n_clusters=250, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

# Append cluster labels as features
X_with_clusters = np.column_stack((X_scaled, clusters))
y_encoded = pd.factorize(y)[0]  # Encode labels

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_with_clusters, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Train KNN
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn.fit(X_train, y_train)

# Evaluate
train_acc = knn.score(X_train, y_train)
test_acc = knn.score(X_test, y_test)

print(f"KMeans + KNN - Train Accuracy: {train_acc:.4f}")
print(f"KMeans + KNN - Test Accuracy: {test_acc:.4f}")


KMeans + KNN - Train Accuracy: 0.5206
KMeans + KNN - Test Accuracy: 0.3217


In [11]:
from sklearn.cluster import MeanShift

# Apply Mean Shift clustering
meanshift = MeanShift()
clusters = meanshift.fit_predict(X_scaled)

# Append cluster labels
X_with_clusters = np.column_stack((X_scaled, clusters))
y_encoded = pd.factorize(y)[0]  # Encode labels

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_with_clusters, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Train KNN
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn.fit(X_train, y_train)

# Evaluate
train_acc = knn.score(X_train, y_train)
test_acc = knn.score(X_test, y_test)

print(f"Mean Shift + KNN - Train Accuracy: {train_acc:.4f}")
print(f"Mean Shift + KNN - Test Accuracy: {test_acc:.4f}")


Mean Shift + KNN - Train Accuracy: 0.6113
Mean Shift + KNN - Test Accuracy: 0.4105
