<a href="https://colab.research.google.com/github/HarshitaBadiyasar/Machine-Learning-Models-for-Connect-4-Game-Prediction/blob/main/q1_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/connect-4/connect-4.data.Z
!uncompress connect-4.data.Z

import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_val_score
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from scipy.stats import randint as sp_randint
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Define mapping for 'b', 'o', and 'x'
mapping = {'b': 0, 'o': 1, 'x': 2}

# Read dataset and replace values with integers
df = pd.read_csv('connect-4.data', header=None)
df.replace(mapping, inplace=True)

# Define mapping for 'won', 'loss', and 'draw'
outcome_mapping = {'win': 1, 'loss': 0, 'draw': 2}

# Replace outcome values with integers
df.replace(outcome_mapping, inplace=True)


#df = pd.read_csv('connect-4.data', header=None)


# Separate the target variable from the rest of the data
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.astype(np.float32))
X_test = scaler.transform(X_test.astype(np.float32))

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("Accuracy_knn:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

cv_scores = cross_val_score(knn, X_train, y_train, cv=5)
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

params = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}
grid_search = GridSearchCV(knn, params, cv=5)
grid_search.fit(X_train, y_train)
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)
best_model = grid_search.best_estimator_
best_model.fit(X_train,y_train)
y_pred = best_model.predict(X_test)
print("Accuracy_grid:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

# Use randomized search to tune hyperparameters
params = {
    'n_neighbors': range(1, 31),
    'weights': ['uniform', 'distance'],
    'leaf_size': range(10, 101, 10)
}
random_search = RandomizedSearchCV(knn, params, n_iter=10, cv=5, scoring='accuracy')
random_search.fit(X_train, y_train)
print("Best parameters:", random_search.best_params_)
print("Best score:", random_search.best_score_)

# Train the model with the best hyperparameters
best_model = random_search.best_estimator_
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
# Evaluate the performance of the model using different evaluation metrics
print("Accuracy_randon_search:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))


--2023-04-15 18:53:47--  https://archive.ics.uci.edu/ml/machine-learning-databases/connect-4/connect-4.data.Z
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 403464 (394K) [application/x-httpd-php]
Saving to: ‘connect-4.data.Z’


2023-04-15 18:53:47 (8.70 MB/s) - ‘connect-4.data.Z’ saved [403464/403464]

Accuracy_knn: 0.7363824748371818
Precision: 0.5899268345194582
Recall: 0.5187691194313345
F1-score: 0.5252964023288328
Confusion matrix:
 [[1887 1366   88]
 [ 821 7937  118]
 [ 443  726  126]]
Cross-validation scores: [0.73993894 0.73984642 0.74021649 0.73956888 0.74095661]
Mean cross-validation score: 0.7401054676658341
Best parameters: {'n_neighbors': 7, 'weights': 'uniform'}
Best score: 0.7483393468405959
Accuracy_grid: 0.7505180580224985
Precision: 0.6184551704178872
Recall: 0.5255151009394955
F1-score: 0.539882022638