#KNN Basic

In [None]:
import numpy as np

In [None]:
X_data = [[1.4, 0.2],
          [1.3, 0.4],
          [4.0, 1.0],
          [4.7, 1.4]]
y_data = [0, 0, 1, 1]

In [None]:
X_data = np.array(X_data)
y_data = np.array(y_data)

In [None]:
X_data

In [None]:
y_data

In [None]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(X_data, y_data)

In [None]:
x_test = np.array([[2.4, 0.8]])

In [None]:
classifier.kneighbors(x_test)

In [None]:
classifier.predict(x_test)

# KNN for Regression

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

In [None]:
# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

In [None]:
# Split train:test = 8:2
X_train, X_test, y_train, y_test = train_test_split(
    diabetes_X, diabetes_y, test_size=0.2, random_state=42
)

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test.shape, y_test.shape

In [None]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)

In [None]:
y_pred = knn_regressor.predict(X_test)

In [None]:
mean_squared_error(y_test, y_pred)

In [None]:
r2_score(y_test, y_pred)

In [None]:
k_values = [i for i in range (1,31)]
scores = []

for k in k_values:
    knn_regressor = KNeighborsRegressor(n_neighbors=k)
    knn_regressor.fit(X_train, y_train)
    y_pred = knn_regressor.predict(X_test)
    score = r2_score(y_test, y_pred)
    scores.append(np.mean(score))

In [None]:
sns.lineplot(x = k_values, y = scores, marker = 'o')
plt.xlabel("K Values")
plt.ylabel("R2 Score")

# KNN for Classification

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Load the diabetes dataset
iris_X, iris_y = datasets.load_iris(return_X_y=True)

In [None]:
# Split train:test = 8:2
X_train, X_test, y_train, y_test = train_test_split(
    iris_X, iris_y, test_size=0.2, random_state=42
)

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test.shape, y_test.shape

In [None]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train, y_train)

In [None]:
y_pred = knn_classifier.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
k_values = [i for i in range (1,31)]
scores = []

for k in k_values:
    knn_classifier = KNeighborsClassifier(n_neighbors=k)
    knn_classifier.fit(X_train, y_train)
    y_pred = knn_classifier.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    scores.append(np.mean(score))

In [None]:
sns.lineplot(x = k_values, y = scores, marker = 'o')
plt.xlabel("K Values")
plt.ylabel("Accuracy Score")

# KNN for Text Classification

In [None]:
!pip install datasets==4.0.0

## Load data

In [None]:
import numpy as np
from datasets import load_dataset
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [None]:
imdb = load_dataset("imdb")

In [None]:
imdb_train, imdb_test = imdb['train'], imdb['test']

In [None]:
len(imdb_train['text']), len(imdb_test['text'])

In [None]:
imdb_train['text'][0], imdb_train['label'][0]

##Convert text to feature (BoW)

In [None]:
# Convert text to vector using BoW
vectorizer = CountVectorizer(max_features=1000)
X_train = vectorizer.fit_transform(imdb_train['text']).toarray()
X_test = vectorizer.transform(imdb_test['text']).toarray()

In [None]:
y_train = np.array(imdb_train['label'])
y_test = np.array(imdb_test['label'])

## Normalize Feature

In [None]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## KNN Classifier

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train, y_train)

## Evaluate

In [None]:
y_pred = knn_classifier.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
f1_score(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred, target_names=['0', '1']))