### 21. Train a KNN Classifier on the Iris dataset and print model accuracy

In [None]:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = KNeighborsClassifier()
model.fit(X_train, y_train)
accuracy = model.score(X_test, y_test)
print("Model accuracy:", accuracy)


### 22. Train a KNN Regressor on a synthetic dataset and evaluate using Mean Squared Error (MSE)

In [None]:

from sklearn.datasets import make_regression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = KNeighborsRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


### 23. Train a KNN Classifier using different distance metrics (Euclidean and Manhattan) and compare accuracy

In [None]:

for metric in ['euclidean', 'manhattan']:
    model = KNeighborsClassifier(metric=metric)
    model.fit(X_train, y_train)
    print(f"{metric} accuracy:", model.score(X_test, y_test))


### 24. Train a KNN Classifier with different values of K and visualize decision boundaries

In [None]:

import matplotlib.pyplot as plt
import numpy as np

accuracies = []
k_values = range(1, 11)
for k in k_values:
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    accuracies.append(model.score(X_test, y_test))

plt.plot(k_values, accuracies, marker='o')
plt.title('Accuracy vs K')
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.show()


### 25. Apply Feature Scaling before training a KNN model and compare results with unscaled data

In [None]:

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train_s, X_test_s, y_train, y_test = train_test_split(X_scaled, y, random_state=42)
model = KNeighborsClassifier()
model.fit(X_train_s, y_train)
print("Accuracy with scaling:", model.score(X_test_s, y_test))


### 26. Train a PCA model on synthetic data and print the explained variance ratio for each component

In [None]:

from sklearn.decomposition import PCA

X, _ = make_regression(n_samples=100, n_features=5, noise=0.1)
pca = PCA()
pca.fit(X)
print("Explained Variance Ratio:", pca.explained_variance_ratio_)


### 27. Apply PCA before training a KNN Classifier and compare accuracy with and without PCA

In [None]:

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
X_train_p, X_test_p, y_train, y_test = train_test_split(X_pca, y, random_state=42)
model = KNeighborsClassifier()
model.fit(X_train_p, y_train)
print("Accuracy with PCA:", model.score(X_test_p, y_test))


### 28. Perform Hyperparameter Tuning on a KNN Classifier using GridSearchCV

In [None]:

params = {'n_neighbors': range(1, 11)}
grid = GridSearchCV(KNeighborsClassifier(), params, cv=5)
grid.fit(X_train, y_train)
print("Best K:", grid.best_params_)
print("Best Score:", grid.best_score_)


### 29. Train a KNN Classifier and check the number of misclassified samples

In [None]:

y_pred = model.predict(X_test)
misclassified = (y_pred != y_test).sum()
print("Misclassified samples:", misclassified)


### 30. Train a PCA model and visualize the cumulative explained variance

In [None]:

pca = PCA().fit(X_scaled)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Cumulative Explained Variance')
plt.grid(True)
plt.show()


### 31. Train a KNN Classifier using different values of the weights parameter (uniform vs. distance) and compare accuracy

In [None]:

for weight in ['uniform', 'distance']:
    model = KNeighborsClassifier(weights=weight)
    model.fit(X_train, y_train)
    print(f"{weight} weighting accuracy:", model.score(X_test, y_test))


### 32. Train a KNN Regressor and analyze the effect of different K values on performance

In [None]:

for k in range(1, 11):
    model = KNeighborsRegressor(n_neighbors=k)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"K={k} MSE:", mean_squared_error(y_test, y_pred))


### 33. Implement KNN Imputation for handling missing values in a dataset

In [None]:

from sklearn.impute import KNNImputer

df = pd.DataFrame(X)
df.iloc[0, 0] = np.nan
imputer = KNNImputer(n_neighbors=2)
X_imputed = imputer.fit_transform(df)
print("Imputed data:", X_imputed[0])


### 34. Train a PCA model and visualize the data projection onto the first two principal components

In [None]:

pca = PCA(n_components=2)
X_proj = pca.fit_transform(X_scaled)
plt.scatter(X_proj[:, 0], X_proj[:, 1], c=y, cmap='viridis')
plt.title('Data Projection on First 2 PCA Components')
plt.show()


### 35. Train a KNN Classifier using the KD Tree and Ball Tree algorithms and compare performance

In [None]:

model_kd = KNeighborsClassifier(algorithm='kd_tree')
model_bt = KNeighborsClassifier(algorithm='ball_tree')
model_kd.fit(X_train, y_train)
model_bt.fit(X_train, y_train)
print("KD Tree accuracy:", model_kd.score(X_test, y_test))
print("Ball Tree accuracy:", model_bt.score(X_test, y_test))


### 36. Train a PCA model on a high-dimensional dataset and visualize the Scree plot

In [None]:

pca = PCA().fit(X_scaled)
plt.plot(range(1, len(pca.explained_variance_)+1), pca.explained_variance_, marker='o')
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Variance')
plt.show()


### 37. Train a KNN Classifier and evaluate performance using Precision, Recall, and F1-Score

In [None]:

y_pred = model.predict(X_test)
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1 Score:", f1_score(y_test, y_pred, average='macro'))


### 38. Train a PCA model and analyze the effect of different numbers of components on accuracy

In [None]:

accuracies = []
components = range(1, X_scaled.shape[1]+1)
for c in components:
    X_pca = PCA(n_components=c).fit_transform(X_scaled)
    X_train_p, X_test_p, y_train, y_test = train_test_split(X_pca, y, random_state=42)
    model = KNeighborsClassifier()
    model.fit(X_train_p, y_train)
    accuracies.append(model.score(X_test_p, y_test))
plt.plot(components, accuracies, marker='o')
plt.title('Accuracy vs PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Accuracy')
plt.show()


### 39. Train a KNN Classifier with different leaf_size values and compare accuracy

In [None]:

for leaf_size in [10, 20, 30, 40, 50]:
    model = KNeighborsClassifier(leaf_size=leaf_size)
    model.fit(X_train, y_train)
    print(f"Leaf Size={leaf_size} Accuracy:", model.score(X_test, y_test))


### 40. Train a PCA model and visualize how data points are transformed before and after PCA

In [None]:

pca = PCA(n_components=2)
X_transformed = pca.fit_transform(X_scaled)
print("Before PCA shape:", X_scaled.shape)
print("After PCA shape:", X_transformed.shape)


### 41. Train a KNN Classifier on a real-world dataset (Wine dataset) and print classification report

In [None]:

X, y = load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = KNeighborsClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


### 42. Train a KNN Regressor and analyze the effect of different distance metrics on prediction error

In [None]:

for metric in ['euclidean', 'manhattan']:
    model = KNeighborsRegressor(metric=metric)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{metric} MSE:", mean_squared_error(y_test, y_pred))


### 43. Train a KNN Classifier and evaluate using ROC-AUC score

In [None]:

y_prob = model.predict_proba(X_test)
print("ROC-AUC Score:", roc_auc_score(y_test, y_prob, multi_class='ovr'))


### 44. Train a PCA model and visualize the variance captured by each principal component

In [None]:

pca = PCA()
pca.fit(X_scaled)
plt.bar(range(1, len(pca.explained_variance_ratio_)+1), pca.explained_variance_ratio_)
plt.title('Variance Captured by Each Component')
plt.xlabel('Principal Component')
plt.ylabel('Variance Ratio')
plt.show()


### 45. Train a KNN Classifier and perform feature selection before training

In [None]:

from sklearn.feature_selection import SelectKBest, f_classif

X_new = SelectKBest(score_func=f_classif, k=2).fit_transform(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_new, y, random_state=42)
model = KNeighborsClassifier()
model.fit(X_train, y_train)
print("Accuracy after feature selection:", model.score(X_test, y_test))


### 46. Train a PCA model and visualize the data reconstruction error after reducing dimensions

In [None]:

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
X_reconstructed = pca.inverse_transform(X_pca)
reconstruction_error = np.mean((X_scaled - X_reconstructed) ** 2)
print("Reconstruction Error:", reconstruction_error)


### 47. Train a KNN Classifier and visualize the decision boundary

In [None]:

from mlxtend.plotting import plot_decision_regions

model = KNeighborsClassifier()
model.fit(X_train[:, :2], y_train)
plot_decision_regions(X_train[:, :2], y_train, clf=model, legend=2)
plt.title('Decision Boundary')
plt.show()


### 48. Train a PCA model and analyze the effect of different numbers of components on data variance

In [None]:

pca = PCA().fit(X_scaled)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.title("Cumulative Explained Variance")
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Variance")
plt.grid(True)
plt.show()
