In [1]:
#6. Train a KNN Classifier on the Wine dataset with and without feature scaling. Compare model accuracy in both cases.
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load dataset
data = load_wine()
X, y = data.data, data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Without scaling
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
acc_no_scaling = accuracy_score(y_test, y_pred)

# With scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_scaled = KNeighborsClassifier(n_neighbors=5)
knn_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = knn_scaled.predict(X_test_scaled)
acc_scaled = accuracy_score(y_test, y_pred_scaled)

print("Accuracy without scaling:", acc_no_scaling)
print("Accuracy with scaling:", acc_scaled)


Accuracy without scaling: 0.7407407407407407
Accuracy with scaling: 0.9629629629629629


In [4]:
#7. Train a PCA model on the Wine dataset and print the explained variance ratio of each principal component.
from sklearn.decomposition import PCA

pca = PCA()
pca.fit(X)

print("Variance ratio of each component:")
print(pca.explained_variance_ratio_)

Variance ratio of each component:
[9.98091230e-01 1.73591562e-03 9.49589576e-05 5.02173562e-05
 1.23636847e-05 8.46213034e-06 2.80681456e-06 1.52308053e-06
 1.12783044e-06 7.21415811e-07 3.78060267e-07 2.12013755e-07
 8.25392788e-08]


In [9]:
#8. Train a KNN Classifier on the PCA-transformed dataset (retain top 2 components). Compare the accuracy with the original dataset.
# Import libraries
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load Wine dataset
data = load_wine()
X, y = data.data, data.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply PCA (retain top 2 components)
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Train KNN on PCA-transformed data
knn_pca = KNeighborsClassifier(n_neighbors=5)
knn_pca.fit(X_train_pca, y_train)
y_pred_pca = knn_pca.predict(X_test_pca)

# Accuracy using PCA
acc_pca = accuracy_score(y_test, y_pred_pca)

# Compare with accuracy using full scaled dataset
knn_full = KNeighborsClassifier(n_neighbors=5)
knn_full.fit(X_train_scaled, y_train)
y_pred_full = knn_full.predict(X_test_scaled)
acc_full = accuracy_score(y_test, y_pred_full)

# Print results
print("Accuracy using PCA (2 components):", acc_pca)
print("Accuracy using full scaled dataset:", acc_full)


Accuracy using PCA (2 components): 0.9814814814814815
Accuracy using full scaled dataset: 0.9629629629629629


In [10]:
#9. Train a KNN Classifier with different distance metrics (euclidean, manhattan) on the scaled Wine dataset and compare the results.
metrics = ['euclidean', 'manhattan']
for m in metrics:
    knn = KNeighborsClassifier(n_neighbors=5, metric=m)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)
    print(f"Accuracy using {m} distance: {accuracy_score(y_test, y_pred):.2f}")

Accuracy using euclidean distance: 0.96
Accuracy using manhattan distance: 0.96


In [8]:
#10.
# Example pipeline
from sklearn.pipeline import Pipeline

model = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.95)),
    ('knn', KNeighborsClassifier(n_neighbors=5))
])

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy after PCA + KNN pipeline:", accuracy_score(y_test, y_pred))

Accuracy after PCA + KNN pipeline: 0.9629629629629629
