# ABALONE 

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OneHotEncoder,LabelEncoder
from scipy.stats import pearsonr
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression

In [None]:
DATA= pd.read_csv('abalone.csv')
DATA.head()
data=DATA.copy()

##### Creating a attribute- Age

In [None]:
data['AGE']= data['RINGS']+1.5
data.head()

Unnamed: 0,SEX,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,RINGS,AGE
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,16.5
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,8.5
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,10.5
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,11.5
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,8.5


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   SEX             4177 non-null   object 
 1   LENGTH          4177 non-null   float64
 2   DIAMETER        4177 non-null   float64
 3   HEIGHT          4177 non-null   float64
 4   WHOLE_WEIGHT    4177 non-null   float64
 5   SHUCKED_WEIGHT  4177 non-null   float64
 6   VISCERA_WEIGHT  4177 non-null   float64
 7   SHELL_WEIGHT    4177 non-null   float64
 8   RINGS           4177 non-null   int64  
 9   AGE             4177 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 326.5+ KB


In [None]:
print('The dataset has {} features with {} records.\n All the columns are numerical except the SEX column which has three classifications- male,female and infant.'.format(data.shape[1],data.shape[0]))

The dataset has 10 features with 4177 records.
 All the columns are numerical except the SEX column which has three classifications- male,female and infant.


In [None]:
data.isna().sum()

SEX               0
LENGTH            0
DIAMETER          0
HEIGHT            0
WHOLE_WEIGHT      0
SHUCKED_WEIGHT    0
VISCERA_WEIGHT    0
SHELL_WEIGHT      0
RINGS             0
AGE               0
dtype: int64

###### The dataset has no null values.

In [None]:
data.drop('RINGS',axis=1).describe()

Unnamed: 0,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,AGE
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,0.523992,0.407881,0.139516,0.828742,0.359367,0.180594,0.238831,11.433684
std,0.120093,0.09924,0.041827,0.490389,0.221963,0.109614,0.139203,3.224169
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,2.5
25%,0.45,0.35,0.115,0.4415,0.186,0.0935,0.13,9.5
50%,0.545,0.425,0.14,0.7995,0.336,0.171,0.234,10.5
75%,0.615,0.48,0.165,1.153,0.502,0.253,0.329,12.5
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,30.5


On an average (irrespective of the sex):

- An Abalone is expected to live for 11 years.

- The height of an Abalone is 0.139516mm. 

- The overall weight of an Abalone's body is 0.828742 grams.

- The maximun length of an Abalone can be 0.815mm and minmum could be 0.075mm.

- The diameter of an Abalone ranges between  0.055 to 0.65mm.



### CHECKING FOR 0 IN HEIGHT

In [None]:
(data.HEIGHT==0).sum()

2

In [None]:
data[data['HEIGHT']==0]

Unnamed: 0,SEX,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,RINGS,AGE
1257,I,0.43,0.34,0.0,0.428,0.2065,0.086,0.115,8,9.5
3996,I,0.315,0.23,0.0,0.134,0.0575,0.0285,0.3505,6,7.5


### IMPUTATION OF HEIGHT COLUMN  HAING  VALUE as 0. 

In [None]:
means = pd.pivot_table(data=data, index=['SEX'],aggfunc={'HEIGHT': np.mean})
means

Unnamed: 0_level_0,HEIGHT
SEX,Unnamed: 1_level_1
F,0.158011
I,0.107996
M,0.151381


In [None]:
data['HEIGHT']= data['HEIGHT'].replace(to_replace=0,value=0.107996)#height 0 is for infant 
(data['HEIGHT']==0).sum()

0

### ENCODING
- The attribute SEX is on nominal scale as we are finding the rings of the abalone, since we give equal weightage to all the classes of SEX.
- For nominal data , we use sklearn method of One hot encoder or pandas method of get_dummies

In [None]:
pd.get_dummies(data,drop_first=True)

Unnamed: 0,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,RINGS,AGE,SEX_I,SEX_M
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,16.5,0,1
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,8.5,0,1
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,10.5,0,0
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,11.5,0,1
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,8.5,1,0
...,...,...,...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,12.5,0,0
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,11.5,0,1
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,10.5,0,1
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,11.5,0,0


### UNIVARIATE ANALYSIS

In [None]:
data.hist(figsize=(20,10))
plt.show()


In [None]:
sns.kdeplot(data.HEIGHT)


###### Height column's datapoints are skwed towards the right. 

In [None]:
sns.kdeplot(data.LENGTH)

###### Length column's datapoints are skwed towards the left.

In [None]:
sns.kdeplot(data.DIAMETER)

###### Diameter column's datapoints are skwed towards the left.

In [None]:

sns.kdeplot(data.WHOLE_WEIGHT)


###### WHOLE_WEIGHT column's datapoints are not normally distributed and  is bimodal.

In [None]:
sns.kdeplot(data.VISCERA_WEIGHT)

###### VISCERA_WEIGHT column's datapoints are positively skwed as the tail of the distribution is elongated towards the right.

In [None]:
sns.kdeplot(data.SHELL_WEIGHT)

###### SHELL_WEIGHT column's datapoints  has its tail elongated to the right and is bimodal.

In [None]:
sns.kdeplot(data.AGE)

###### AGE column's datapoints  has its tail elongated to the right, so it is positively skwed.

##### Among all the columns, Height has the maximum skwed datapoints.

### Abalone's of which sex are found to be the most?

In [None]:
data['SEX'].value_counts().plot.pie()
data['SEX'].value_counts()

0- Female         1-Infant         2-Male

Male Abalones have a maximun count followed by the infant Abalones. Female Abalone's have the  least .

### Which ring has the highest and the least count? and the range where the rings ranges  the most?

In [None]:
sns.countplot(data=data,x='RINGS')
data.RINGS.value_counts()


Ring 9 has the highest count of 689 followed by ring 10 with the count of 634.

Rings - 1,2,25,26,29 have the least count of 1 followed by 24 and 27 with the count of 2.

The rings ranges maximum between the range of 6 to 12.

Rings datapoints are skwed towards the right.

### BIVARIATE ANALYSIS

### Which gender has the maximum rings?

In [None]:
pd.pivot_table(index=['SEX'],values=['RINGS'],data=data,aggfunc={np.max})


###### Female abalone's have the maximum count of rings.

### On an average which gender has the maximum rings?

In [None]:
pd.pivot_table(index=['SEX'],values=['RINGS'],data=data,aggfunc={np.mean})


In [None]:
plt.figure(figsize=(12,5))
sns.barplot(data=data,x='RINGS',y='RINGS',hue='SEX')
plt.show()



###### Female Abalones have  more rings than male and infants. 


### Who has a longer lifespan among the male and female Abalone?

In [None]:
sns.swarmplot(data=data,x='SEX',y='AGE')
data.groupby(['SEX'])['AGE'].median()

##### Female Abalone's have longer lifespan when compared to the male.
width explains the accumulation of the datapoints, length explains the variance.
- Female abalone's age ranges the most between 10-25.
- Male abalone's age ranges the most between 10-20.
- Infant abalone's age ranges the most between 5-15.

### Is their a significance difference in the height of Abalones as per their gender?

In [None]:
sns.barplot(data=data,x='SEX',y='HEIGHT')

data.groupby(['SEX'])[["HEIGHT",'LENGTH']].mean()


0- Female
1-Infant
2-Male

On an average, the height of a female abalone is more than the male abalone with a significance difference of 0.006mm.

### Is their a significance difference in the diameter of Abalones as per their gender?

In [None]:
sns.barplot(data=data,x='SEX',y='DIAMETER')

data.groupby(['SEX'])["DIAMETER"].mean()

###### The female Abalones have a higher diameter in comparison to the male.
The diameter of a male Abalone and female Abalone has a difference of 0.015mm.


### Find the  number of rings that each sex of Abalone have?And among them which has the highest?

In [None]:
data.groupby(['SEX'])['RINGS'].count()


##### Female Abalones have the maximum number of rings.

### What is the max and min weights of an Abalone's shell(after being dried), viscera(gut weight- industrial waste), shucked(flesh)? Which sex of Abalone weighs the most? 

In [None]:
pd.pivot_table(index=['SEX'],values=['WHOLE_WEIGHT','SHUCKED_WEIGHT','VISCERA_WEIGHT','SHELL_WEIGHT'],data=data, aggfunc=({min,max}))

#### Male Abalones weighs more than the female Abalones.
Though the female Abalones have the shell weight and shucked weight more than the male Abalones. It is the Viscera weight that makes the male Abalones weighs the most.


###### To graphically understand the above interpretation.

In [None]:
sns.barplot(data.SEX,data.WHOLE_WEIGHT)

In [None]:
sns.barplot(data.SEX,data.SHUCKED_WEIGHT)

In [None]:
sns.barplot(data.SEX,data.VISCERA_WEIGHT)

In [None]:
sns.barplot(data.SEX,data.SHELL_WEIGHT)

### MULTIVARIATE ANALYSIS

### Does age have an impact on the height and weight of  Abalone?


In [None]:
c,p= pearsonr(data.WHOLE_WEIGHT,data.AGE)
if p>0.05:
    print('NOT SIGNIFICANT: THERE IS NO IMPACT OF AGE ON WHOLE_WEIGHT',p)
else:
    print(' SIGNIFICANT: THERE IS AN IMPACT OF AGE ON WHOLE_WEIGHT',p)

In [None]:
sns.lmplot(data=data,x='AGE',y='WHOLE_WEIGHT',hue='SEX')


###### From the graph we can infer that the age has an significant impact on the weight of Abalone as the lower points shows the infants and as the age increase weight increases. 

In [None]:
c,p= pearsonr(data.HEIGHT,data.AGE)
if p>0.05:
    print('NOT SIGNIFICANT: THERE IS NO IMPACT OF AGE ON HEIGHT',p)
else:
    print(' SIGNIFICANT: THERE IS AN IMPACT OF AGE ON HEIGHT',p)

In [None]:
sns.lmplot(data=data,x='AGE',y='HEIGHT',hue='SEX')

###### From the graph we can infer that the age has an significant impact on the height of Abalone as the lower points shows the infants and as the age increase height increases. 

###### The above graphical and statistical tests show that as the age increases the height and weight increases of the Abalone.

### Does diameter of Abalone increases with the increase in height?

In [None]:
sns.lmplot(data=data,x='DIAMETER',y='HEIGHT',hue='SEX')

###### Yes, the diameter of an Abalone increases as the height of the Abalone increases.

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(data.corr(),annot=True,cbar=False)
plt.show()



In [None]:
DF= DATA.copy()

### ENCODING

In [None]:
encode= pd.get_dummies(DF['SEX'])
DF=DF.join(encode)
DF=DF.drop('SEX',1)
DF

Unnamed: 0,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,RINGS,F,I,M
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,0,0,1
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,0,0,1
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,1,0,0
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,0,0,1
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,1,0,0
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,0,0,1
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,0,0,1
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,1,0,0


In [None]:
DF.RINGS.astype('category')

0       15
1        7
2        9
3       10
4        7
        ..
4172    11
4173    10
4174     9
4175    10
4176    12
Name: RINGS, Length: 4177, dtype: category
Categories (28, int64): [1, 2, 3, 4, ..., 25, 26, 27, 29]

In [None]:
df=DF.copy()

In [None]:
df.RINGS=np.where(df.RINGS.values<=10,0,1)

In [None]:
df

Unnamed: 0,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,RINGS,F,I,M
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,1,0,0,1
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,0,0,0,1
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,0,1,0,0
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,0,0,0,1
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,1,1,0,0
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,0,0,0,1
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,0,0,0,1
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,0,1,0,0


### PREVALENCE RATE

In [None]:
df['RINGS'].value_counts(normalize=True)*100

###### Since the minority class is more than 25%, we assume the evaluation metrics is accuracy.

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
x=df.drop('RINGS',axis=1)
y=df.iloc[:,-4]


### CHECK PREVELANCE RATE FOR YTRAIN AND YTEST

In [None]:
ytrain.value_counts(normalize=True)

In [None]:
ytest.value_counts(normalize=True)

the prevalance rate is changing for ytest and ytrain but it remains balanced dataset as the deviation is very minimal.

# LOGISTIC REGRESSION


In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logistic= LogisticRegression()

In [None]:
logistic.fit(xtrain,ytrain)

In [None]:
log_train_pred=logistic.predict(xtrain)
log_test_pred=logistic.predict(xtest)
log_train_pred,log_test_pred

In [None]:
log_train_pred_prob=logistic.predict_proba(xtrain)
log_test_pred_prob=logistic.predict_proba(xtest)
log_train_pred_prob

### Check the predict_prob values -first will be failure rate(0), second value will be success rate(1)  , if the second values are above threshold it will show 1 in pred else 0 if failure

In [None]:
check_train= np.argmax(log_train_pred_prob,axis=1)
check_train


In [None]:
check_test= np.argmax(log_test_pred_prob,axis=1)
check_test

### EVALUATION METRICS

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [None]:
accuracy_score(ytrain,log_train_pred)

In [None]:
accuracy_score(ytest,log_test_pred)

if the difference in accuracy score is not crossing -5 or +5(because we assume 95% confidence level) then we can say the model is not underfitting or overfitting

In [None]:
confusion_matrix(ytrain,log_train_pred)


### false negative(466) is more in comparison to false positive(230), so the desired metrics is recall.

In [None]:
confusion_matrix(ytest,log_test_pred)
#test is only to view the results, tuning/changes are to be made only in train.

In [None]:
print(classification_report(ytrain,log_train_pred))

In [None]:
print(classification_report(ytest,log_test_pred))

In [None]:


crossval= cross_val(logistic,xtrain,ytrain,cv=5)

In [None]:
sns.boxplot(crossval)

In [None]:
import statsmodels.api as sm


In [None]:
xtrain1= sm.add_constant(xtrain)

In [None]:
model = sm.Logit(ytrain,xtrain1).fit()

In [None]:
model.summary()

In [None]:
df1= DATA.copy()

df1.RINGS=np.where(df1.RINGS.values<=10,0,1)

In [None]:
X= df1.iloc[:,:-1]
Y= df1.iloc[:,-1]


In [None]:
x_train,x_test,y_train,y_test= train_test_split(X,Y,test_size=0.3, stratify= Y)

x_train1 = pd.get_dummies(x_train,drop_first=True)
x_test1 =pd.get_dummies(x_test,drop_first=True)

In [None]:
import statsmodels.api as sm


In [None]:
y_train

1517    0
2501    0
1009    0
4026    0
3463    1
       ..
934     0
4006    0
3278    1
3804    0
2048    0
Name: RINGS, Length: 2923, dtype: int32

In [None]:
x_train2 = sm.add_constant(x_train1)
sm.Logit(y_train,x_train2).fit().summary()

In [None]:

from sklearn.preprocessing  import MinMaxScaler
scaler = MinMaxScaler()
x_train3= scaler.fit_transform(x_train1)
x_test3= scaler.transform(x_test1)


In [None]:
x_train4= sm.add_constant(x_train1)

In [None]:
sm.Logit(y_train,x_train4).fit().summary()


K- NEAREST NEIGHBOURS

In [None]:
import numpy as np 
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
df1.head()

Unnamed: 0,SEX,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,RINGS
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,1
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,0
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,0
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,0
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,0


In [None]:
knn= KNeighborsClassifier()

In [None]:
knn.fit(x_train1,y_train)
x_train1

Unnamed: 0,LENGTH,DIAMETER,HEIGHT,WHOLE_WEIGHT,SHUCKED_WEIGHT,VISCERA_WEIGHT,SHELL_WEIGHT,SEX_I,SEX_M
1517,0.665,0.515,0.180,1.3890,0.5945,0.3240,0.3950,0,0
2501,0.330,0.260,0.080,0.2000,0.0625,0.0500,0.0700,0,0
1009,0.615,0.480,0.160,1.2525,0.5850,0.2595,0.3300,0,0
4026,0.365,0.280,0.090,0.1960,0.0865,0.0360,0.0605,1,0
3463,0.625,0.485,0.170,1.4370,0.5855,0.2930,0.4750,0,1
...,...,...,...,...,...,...,...,...,...
934,0.450,0.355,0.105,0.4445,0.1970,0.0930,0.1335,1,0
4006,0.585,0.465,0.150,0.9800,0.4315,0.2545,0.2470,0,1
3278,0.525,0.415,0.160,0.6445,0.2600,0.1575,0.2200,0,1
3804,0.355,0.270,0.100,0.2255,0.1100,0.0420,0.0640,1,0


In [None]:
knn_train_pred= knn.predict(x_train1)
knn_test_pred= knn.predict(x_test1)

In [None]:
print('CONFUSION MATRIX: TRAIN DATA',confusion_matrix(knn_train_pred,y_train))
print('-'*50)
print('CONFUSION MATRIX: VALIDATION DATA',confusion_matrix(knn_test_pred,y_test))


CONFUSION MATRIX: TRAIN DATA [[1726  272]
 [ 184  741]]
--------------------------------------------------
CONFUSION MATRIX: VALIDATION DATA [[688 173]
 [132 261]]


In [None]:
print('CLASSIFICATION REPORT: TRAIN DATA',classification_report(knn_train_pred,y_train))
print('-'*50)
print('CLASSIFICATION REPORT: VALIDATION DATA',classification_report(knn_test_pred,y_test))


CLASSIFICATION REPORT: TRAIN DATA               precision    recall  f1-score   support

           0       0.90      0.86      0.88      1998
           1       0.73      0.80      0.76       925

    accuracy                           0.84      2923
   macro avg       0.82      0.83      0.82      2923
weighted avg       0.85      0.84      0.85      2923

--------------------------------------------------
CLASSIFICATION REPORT: VALIDATION DATA               precision    recall  f1-score   support

           0       0.84      0.80      0.82       861
           1       0.60      0.66      0.63       393

    accuracy                           0.76      1254
   macro avg       0.72      0.73      0.72      1254
weighted avg       0.76      0.76      0.76      1254



In [None]:
result= pd.DataFrame(columns=['TRAIN','TEST'])
for i in range(1,201):
    KNN= KNeighborsClassifier(n_neighbors=i)
    KNN.fit(x_train1,y_train)
    train_pred=KNN.predict(x_train1)
    test_pred=KNN.predict(x_test1)
    Accuracy1= KNN.score(x_train1,y_train)
    Accuracy2= KNN.score(x_test1,y_test)
    result = result.append(({'TRAIN':Accuracy1,'TEST':Accuracy2}),ignore_index=True)

In [None]:
result

Unnamed: 0,TRAIN,TEST
0,1.000000,0.722488
1,0.862812,0.732057
2,0.870339,0.748804
3,0.839548,0.750399
4,0.843996,0.756778
...,...,...
195,0.737598,0.720096
196,0.739309,0.720893
197,0.740677,0.720096
198,0.741362,0.721691


In [None]:
%matplotlib qt

In [None]:
result.plot()

<AxesSubplot:>

In [None]:

x_train2 = pd.get_dummies(x_train)
x_test2 =pd.get_dummies(x_test)

In [None]:
result1= pd.DataFrame(columns=['TRAIN','TEST'])
for i in range(1,201):
    KNN= KNeighborsClassifier(n_neighbors=i)
    KNN.fit(x_train2,y_train)
    Accuracy3= KNN.score(x_train2,y_train)
    Accuracy4= KNN.score(x_test2,y_test)
    result1 = result1.append(({'TRAIN':Accuracy3,'TEST':Accuracy4}),ignore_index=True)

In [None]:
result1

Unnamed: 0,TRAIN,TEST
0,1.000000,0.722488
1,0.862812,0.732057
2,0.870339,0.748804
3,0.839548,0.750399
4,0.843996,0.756778
...,...,...
195,0.737598,0.718501
196,0.739309,0.719298
197,0.740677,0.718501
198,0.741362,0.720096


In [None]:
%matplotlib qt

In [None]:
result1.plot()