In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [6]:
## Load the data

data = pd.read_csv(r"C:\Users\user\Downloads\glass.csv")

In [7]:
data

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [8]:

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RI      214 non-null    float64
 1   Na      214 non-null    float64
 2   Mg      214 non-null    float64
 3   Al      214 non-null    float64
 4   Si      214 non-null    float64
 5   K       214 non-null    float64
 6   Ca      214 non-null    float64
 7   Ba      214 non-null    float64
 8   Fe      214 non-null    float64
 9   Type    214 non-null    int64  
dtypes: float64(9), int64(1)
memory usage: 16.8 KB


In [None]:
## split the data into independent and v=dependent variable

In [9]:
x = data.drop(['Type'] , axis = 1)
y = data['Type']

In [10]:
x.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0


In [11]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Type, dtype: int64

In [12]:
## split the data into training and testing data sets

x_train , x_test  , y_train , y_test  = train_test_split(x , y , test_size = 0.20 , random_state = 42)

In [13]:
## create a KNN Classifier

knn_classifier = KNeighborsClassifier(n_neighbors = 5)

In [14]:
## train the knn classifier

knn_classifier.fit(x_train , y_train)

In [16]:
## make the prediction on the test data sets

knn_pred = knn_classifier.predict(x_test)

In [17]:
knn_pred

array([1, 7, 1, 7, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 6, 5, 6, 1, 1, 7, 2, 7,
       7, 7, 1, 1, 1, 1, 2, 1, 2, 2, 3, 2, 1, 7, 7, 1, 1, 2, 1, 7, 3],
      dtype=int64)

In [18]:
### Evaluate the model

knn_acc = accuracy_score(y_test , knn_pred)

In [19]:
knn_acc

0.6511627906976745

In [22]:
print(classification_report(y_test , knn_pred))

              precision    recall  f1-score   support

           1       0.56      0.82      0.67        11
           2       0.69      0.64      0.67        14
           3       0.50      0.33      0.40         3
           5       1.00      0.25      0.40         4
           6       0.50      0.33      0.40         3
           7       0.78      0.88      0.82         8

    accuracy                           0.65        43
   macro avg       0.67      0.54      0.56        43
weighted avg       0.68      0.65      0.63        43



In [25]:
## parameter grid for hyperparameter tuning

param_grid = {'n_neighbors':[3,5,7,9],
             'weights':['uniform','distance'],
             'p':[1,2]}

In [26]:
## grid serach cross validation

grid_search = GridSearchCV(knn_classifier , param_grid , cv = 5)
grid_search.fit(x_train , y_train)

In [27]:
best_params = grid_search.best_params_

In [28]:
print(best_params)

{'n_neighbors': 3, 'p': 1, 'weights': 'distance'}


In [29]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(x_test)

In [30]:
accuracy = accuracy_score(y_test , y_pred)

In [31]:
accuracy

0.7441860465116279

In [32]:
print(classification_report(y_test ,y_pred))

              precision    recall  f1-score   support

           1       0.57      0.73      0.64        11
           2       0.82      0.64      0.72        14
           3       0.50      0.67      0.57         3
           5       1.00      1.00      1.00         4
           6       0.67      0.67      0.67         3
           7       1.00      0.88      0.93         8

    accuracy                           0.74        43
   macro avg       0.76      0.76      0.76        43
weighted avg       0.77      0.74      0.75        43

