<h2>Initializing</h2>
<p>Import Libraries, add variables for attribute strings (to save '') and load data</p>

In [1]:
import pandas as pd
import numpy as np
from scipy.stats.stats import pearsonr
from sklearn.cluster import KMeans
from sklearn import preprocessing
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix 
from sklearn import tree
from sklearn.metrics import classification_report
from scipy import interp
from sklearn.metrics import accuracy_score
from sklearn.utils.multiclass import unique_labels
import math

fixed_acidity = 'fixed acidity'
volatile_acidity = 'volatile acidity'
citric_acid = 'citric acid'
residual_sugar = 'residual sugar'
chlorides = 'chlorides'
free_sulfur_dioxide = 'free sulfur dioxide'
total_sulfur_dioxide = 'total sulfur dioxide'
density = 'density'
ph = 'pH'
sulphates = 'sulphates'
alcohol = 'alcohol'
quality = 'quality'
qclass = 'qclass'
ratio_to_fixed = 'ratio_to_fixed'
ratio_to_volatile = 'ratio_to_volatile'
ph_acidity = 'ph_acidity'
ratio_to_ph = 'ratio_to_ph'

In [46]:
def confusion_matrix_report(y_true, y_pred):
    cm, labels = confusion_matrix(y_true, y_pred), unique_labels(y_true, y_pred)
    column_width = max([len(str(x)) for x in labels] + [5])  # 5 is value length
    report = " " * column_width + " " + "{:_^{}}".format("Prediction", column_width * len(labels))+ "\n"
    report += " " * column_width + " ".join(["{:>{}}".format(label, column_width) for label in labels]) + "\n"
    for i, label1 in enumerate(labels):
        report += "{:>{}}".format(label1, column_width) + " ".join(["{:{}d}".format(cm[i, j], column_width) for j in range(len(labels))]) + "\n"
    return report
def root_mean_squared_error(y_true, y_pred):
        return math.sqrt(np.mean((y_pred - y_true)**2))

<p>Import red and white dataset.<br/>
Split into input matrix (independent vars) and output vector (target vars).<br/>
</p>

<p>sort for kNN (https://scikit-learn.org/stable/modules/neighbors.html#unsupervised-nearest-neighbors) </p>

In [3]:
white = pd.read_csv("data/winequality-white1.csv")
red = pd.read_csv("data/winequality-red1.csv")
white.sort_values(by='quality',inplace=True)
red.sort_values(by='quality',inplace=True)
white_target = white[quality]
red_target = red[quality]
white_input = white.drop(quality,axis=1)
red_input = red.drop(quality,axis=1)

Add ratio of acidity to sugar as attribute.

In [4]:
#white[ph_acidity] = 7 - white[ph]
#red[ph_acidity] = 7 - red[ph]
#white_input[ratio_to_fixed]=white[residual_sugar]/white[fixed_acidity]
#white_input[ratio_to_volatile]=white[residual_sugar]/white[volatile_acidity]
#white_input[ratio_to_ph]=white[residual_sugar]/white[ph_acidity]
#red_input[ratio_to_fixed]=red[residual_sugar]/red[fixed_acidity]
#red_input[ratio_to_volatile]=red[residual_sugar]/red[volatile_acidity]
#red_input[ratio_to_ph]=red[residual_sugar]/red[ph_acidity]

<p>using log.</p>

In [5]:
log_white_norm=np.log(white_input+1)
log_red_norm=np.log(red_input+1)
f_log_white=log_white_norm.drop([fixed_acidity,sulphates,chlorides,citric_acid,ph,density,total_sulfur_dioxide,residual_sugar],axis=1)
f_log_red=log_red_norm.drop([free_sulfur_dioxide,ph,residual_sugar,chlorides,citric_acid,fixed_acidity,density],axis=1)

<p>Normalize using MinMax Normalizer.</p>

In [6]:
mm_white_norm=(white_input-white_input.min())/(white_input.max()-white_input.min())
mm_red_norm=(red_input-red_input.min())/(red_input.max()-red_input.min())
f_mm_white = mm_white_norm.drop([fixed_acidity,sulphates,chlorides,citric_acid,ph,density,total_sulfur_dioxide,residual_sugar],axis=1)
f_mm_red = mm_red_norm.drop([free_sulfur_dioxide,ph,residual_sugar,chlorides,citric_acid,fixed_acidity,density],axis=1)

<p>Normalize using Mean Normalizer.</p>

In [7]:
white_norm=(white_input-white_input.mean())/(white_input.std())
red_norm=(red_input-red_input.mean())/(red_input.std())

<p>Correlation Matrices</p>

In [8]:
wcorr = white.corr()
wcorr.style.background_gradient().set_precision(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
fixed acidity,1.0,-0.0227,0.289,0.089,0.0231,-0.0494,0.0911,0.265,-0.426,-0.0171,-0.121,-0.114
volatile acidity,-0.0227,1.0,-0.149,0.0643,0.0705,-0.097,0.0893,0.0271,-0.0319,-0.0357,0.0677,-0.195
citric acid,0.289,-0.149,1.0,0.0942,0.114,0.0941,0.121,0.15,-0.164,0.0623,-0.0757,-0.00921
residual sugar,0.089,0.0643,0.0942,1.0,0.0887,0.299,0.401,0.839,-0.194,-0.0267,-0.451,-0.0976
chlorides,0.0231,0.0705,0.114,0.0887,1.0,0.101,0.199,0.257,-0.0904,0.0168,-0.36,-0.21
free sulfur dioxide,-0.0494,-0.097,0.0941,0.299,0.101,1.0,0.616,0.294,-0.000618,0.0592,-0.25,0.00816
total sulfur dioxide,0.0911,0.0893,0.121,0.401,0.199,0.616,1.0,0.53,0.00232,0.135,-0.449,-0.175
density,0.265,0.0271,0.15,0.839,0.257,0.294,0.53,1.0,-0.0936,0.0745,-0.78,-0.307
pH,-0.426,-0.0319,-0.164,-0.194,-0.0904,-0.000618,0.00232,-0.0936,1.0,0.156,0.121,0.0994
sulphates,-0.0171,-0.0357,0.0623,-0.0267,0.0168,0.0592,0.135,0.0745,0.156,1.0,-0.0174,0.0537


In [9]:
rcorr = red.corr()
rcorr.style.background_gradient().set_precision(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
fixed acidity,1.0,-0.256,0.672,0.115,0.0937,-0.154,-0.113,0.668,-0.683,0.183,-0.0617,0.124
volatile acidity,-0.256,1.0,-0.552,0.00192,0.0613,-0.0105,0.0765,0.022,0.235,-0.261,-0.202,-0.391
citric acid,0.672,-0.552,1.0,0.144,0.204,-0.061,0.0355,0.365,-0.542,0.313,0.11,0.226
residual sugar,0.115,0.00192,0.144,1.0,0.0556,0.187,0.203,0.355,-0.0857,0.00553,0.0421,0.0137
chlorides,0.0937,0.0613,0.204,0.0556,1.0,0.00556,0.0474,0.201,-0.265,0.371,-0.221,-0.129
free sulfur dioxide,-0.154,-0.0105,-0.061,0.187,0.00556,1.0,0.668,-0.0219,0.0704,0.0517,-0.0694,-0.0507
total sulfur dioxide,-0.113,0.0765,0.0355,0.203,0.0474,0.668,1.0,0.0713,-0.0665,0.0429,-0.206,-0.185
density,0.668,0.022,0.365,0.355,0.201,-0.0219,0.0713,1.0,-0.342,0.149,-0.496,-0.175
pH,-0.683,0.235,-0.542,-0.0857,-0.265,0.0704,-0.0665,-0.342,1.0,-0.197,0.206,-0.0577
sulphates,0.183,-0.261,0.313,0.00553,0.371,0.0517,0.0429,0.149,-0.197,1.0,0.0936,0.251


<h2>Assigning classes</h2>
<p>
    Assign classes based on quality. Less than 6; 6; better than 6. Because it makes sense considering the distribution of ratings.
</p>

In [10]:
#Assign classes for white
conditions = [(white['quality'] < 6),
              (white['quality'] > 6)]
choices = ['2_low', '1_high']
white['class'] = np.select(conditions, choices, default = '3_medium')
choices = [1,3]
white['classnum'] = np.select(conditions, choices, default = 2)
white_classnum = white['classnum']
white.drop('classnum',axis=1,inplace=True)
#Assign classes for red
conditions = [(red['quality'] < 6),
              (red['quality'] > 6)]
choices = ['3_low', '1_high']
red['class'] = np.select(conditions, choices, default = '2_medium')
choices = [1,3]
red['classnum'] = np.select(conditions, choices, default = 2)
red_classnum = red['classnum']
red.drop('classnum',axis=1,inplace=True)

#print('class distribution fot white:')
#print(white['class'].value_counts())
#print('\n class distribution for red:')
#print(red['class'].value_counts())

white_norm['class']=white['class']
red_norm['class']=red['class']

white_targetclass = white_classnum
red_targetclass = red_classnum
print(white_targetclass.value_counts())
print(red_targetclass.value_counts())
#white_targetclass = white['class']
#red_targetclass = red['class']

2    2198
1    1640
3    1060
Name: classnum, dtype: int64
1    744
2    638
3    217
Name: classnum, dtype: int64


In [11]:
white_norm_input = white_norm.drop('class', axis = 1)
red_norm_input = red_norm.drop('class', axis = 1)

In [12]:
#white_norm.loc[white_norm['class']=='3_high'].describe()
#white_norm.loc[white_norm['class']=='2_medium'].describe()
#white_norm.loc[white_norm['class']=='1_low'].describe()
white_norm_filtered = white_norm_input.drop([fixed_acidity,sulphates,chlorides,citric_acid,ph,density,total_sulfur_dioxide,residual_sugar],axis=1)
white_norm_filtered.describe()

Unnamed: 0,volatile acidity,free sulfur dioxide,alcohol
count,4898.0,4898.0,4898.0
mean,-1.998539e-14,-6.380722e-17,-4.325046e-14
std,1.0,1.0,1.0
min,-1.966784,-1.958477,-2.043089
25%,-0.6770318,-0.7237012,-0.8241915
50%,-0.1809733,-0.07691388,-0.09285319
75%,0.414297,0.6286722,0.719745
max,8.152811,14.91679,2.99502


In [13]:
#red_norm.loc[red_norm['class']=='3_high'].describe()
#red_norm.loc[red_norm['class']=='2_medium'].describe()
#red_norm.loc[red_norm['class']=='1_low'].describe()
red_norm_filtered = red_norm_input.drop([free_sulfur_dioxide,ph,residual_sugar,chlorides,citric_acid,fixed_acidity,density],axis=1)
red_norm_filtered.describe()

Unnamed: 0,volatile acidity,total sulfur dioxide,sulphates,alcohol
count,1599.0,1599.0,1599.0,1599.0
mean,8.477896e-15,2.36556e-16,6.625737e-15,2.204136e-14
std,1.0,1.0,1.0,1.0
min,-2.277567,-1.230199,-1.935902,-1.898325
25%,-0.7696903,-0.7438076,-0.63802,-0.8661079
50%,-0.04367545,-0.2574163,-0.2250577,-0.2092427
75%,0.6264921,0.4721707,0.4238832,0.6352984
max,5.876138,7.372847,7.9162,4.201138


<h2>k-NN classification<h2><p>https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html#the-curse-of-dimensionality</p>

<h3>white: normalized and selected for attributes with low std/range</h3>

In [14]:
def run_knn_report(inputs, targets):
    print("_______________________________________________________________________________________________")
    print("KNN, no weights")
    for n_neighbour in range(1,21):
        print(str(n_neighbour) + " neighbours:")
        knn_estimator = KNeighborsClassifier(n_neighbour)
        print(str(n_neighbour)+ " neighbours//")
        predicted = cross_val_predict(knn_estimator,inputs,targets,cv=cv)
        print(confusion_matrix_report(targets,predicted))
        print(classification_report(targets,predicted))
        try:
            print("RMSE:")
            print(root_mean_squared_error(targets,predicted))
        except(Error):
            pass
    
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=1337)
#cv = KFold(n_splits=3)
run_knn_report(white_norm_filtered.values, white_targetclass)


_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1120   419   101
    2  420  1444   334
    3   84   326   650

             precision    recall  f1-score   support

          1       0.69      0.68      0.69      1640
          2       0.66      0.66      0.66      2198
          3       0.60      0.61      0.61      1060

avg / total       0.66      0.66      0.66      4898

RMSE:
1.1171662512042362
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1310   286    44
    2  799  1243   156
    3  197   484   379

             precision    recall  f1-score   support

          1       0.57      0.80      0.66      1640
          2       0.62      0.57      0.59      2198
          3       0.65      0.36      0.46      1060

avg / total       0.61      0.60      0.59      4898

RMSE:
0.721964788885416
3 neighbours:
3

             precision    recall  f1-score   support

          1       0.63      0.63      0.63      1640
          2       0.55      0.64      0.59      2198
          3       0.58      0.37      0.45      1060

avg / total       0.58      0.58      0.58      4898

RMSE:
0.960425589615904


<h3><i>red: </i>normalized and selected for attributes with low std/range</h3>

In [15]:
run_knn_report(red_norm_filtered.values,red_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  567   165    12
    2  147   417    74
    3   18    69   130

             precision    recall  f1-score   support

          1       0.77      0.76      0.77       744
          2       0.64      0.65      0.65       638
          3       0.60      0.60      0.60       217

avg / total       0.70      0.70      0.70      1599

RMSE:
1.1796163292264188
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  641   100     3
    2  288   322    28
    3   36   121    60

             precision    recall  f1-score   support

          1       0.66      0.86      0.75       744
          2       0.59      0.50      0.55       638
          3       0.66      0.28      0.39       217

avg / total       0.64      0.64      0.62      1599

RMSE:
0.8138113605631304
3 neighbours:


<h3>white: normalized unselected</h3>

In [16]:
run_knn_report(white_norm_input.values,white_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1156   416    68
    2  381  1538   279
    3   59   276   725

             precision    recall  f1-score   support

          1       0.72      0.70      0.71      1640
          2       0.69      0.70      0.69      2198
          3       0.68      0.68      0.68      1060

avg / total       0.70      0.70      0.70      4898

RMSE:
1.170970497878053
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1308   303    29
    2  735  1331   132
    3  132   445   483

             precision    recall  f1-score   support

          1       0.60      0.80      0.69      1640
          2       0.64      0.61      0.62      2198
          3       0.75      0.46      0.57      1060

avg / total       0.65      0.64      0.63      4898

RMSE:
0.8430292071934984
3 neighbours:
3

      __Prediction___
         1     2     3
    1 1039   548    53
    2  514  1406   278
    3   60   506   494

             precision    recall  f1-score   support

          1       0.64      0.63      0.64      1640
          2       0.57      0.64      0.60      2198
          3       0.60      0.47      0.52      1060

avg / total       0.60      0.60      0.60      4898

RMSE:
1.016001016001524


<h3><i>red: </i> normalized unselected</h3>

In [17]:
run_knn_report(red_norm_input.values,red_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  554   166    24
    2  158   412    68
    3   21    61   135

             precision    recall  f1-score   support

          1       0.76      0.74      0.75       744
          2       0.64      0.65      0.65       638
          3       0.59      0.62      0.61       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.1630651100782905
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  627   109     8
    2  286   316    36
    3   39   105    73

             precision    recall  f1-score   support

          1       0.66      0.84      0.74       744
          2       0.60      0.50      0.54       638
          3       0.62      0.34      0.44       217

avg / total       0.63      0.64      0.62      1599

RMSE:
0.8436200621463342
3 neighbours:


      __Prediction___
         1     2     3
    1  551   180    13
    2  221   356    61
    3   18   119    80

             precision    recall  f1-score   support

          1       0.70      0.74      0.72       744
          2       0.54      0.56      0.55       638
          3       0.52      0.37      0.43       217

avg / total       0.61      0.62      0.61      1599

RMSE:
1.001874415890415


<h3> white: unfiltered, unnormalized <h3>

In [18]:
run_knn_report(white_input.values,white_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1052   450   138
    2  437  1435   326
    3  107   325   628

             precision    recall  f1-score   support

          1       0.66      0.64      0.65      1640
          2       0.65      0.65      0.65      2198
          3       0.58      0.59      0.58      1060

avg / total       0.64      0.64      0.64      4898

RMSE:
1.0923095061227794
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1234   345    61
    2  856  1200   142
    3  265   467   328

             precision    recall  f1-score   support

          1       0.52      0.75      0.62      1640
          2       0.60      0.55      0.57      2198
          3       0.62      0.31      0.41      1060

avg / total       0.58      0.56      0.55      4898

RMSE:
0.6319388199114419
3 neighbours:


      __Prediction___
         1     2     3
    1  852   701    87
    2  632  1339   227
    3  195   600   265

             precision    recall  f1-score   support

          1       0.51      0.52      0.51      1640
          2       0.51      0.61      0.55      2198
          3       0.46      0.25      0.32      1060

avg / total       0.50      0.50      0.49      4898

RMSE:
0.810808971484713


<h3>red, unnormalized, unselected</h3>

In [19]:
run_knn_report(red_input.values,red_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  518   188    38
    2  181   398    59
    3   28    75   114

             precision    recall  f1-score   support

          1       0.71      0.70      0.70       744
          2       0.60      0.62      0.61       638
          3       0.54      0.53      0.53       217

avg / total       0.65      0.64      0.64      1599

RMSE:
1.0960729263659628
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  605   128    11
    2  353   257    28
    3   62    95    60

             precision    recall  f1-score   support

          1       0.59      0.81      0.69       744
          2       0.54      0.40      0.46       638
          3       0.61      0.28      0.38       217

avg / total       0.57      0.58      0.55      1599

RMSE:
0.6710301229387411
3 neighbours:


      __Prediction___
         1     2     3
    1  554   181     9
    2  324   290    24
    3   70   113    34

             precision    recall  f1-score   support

          1       0.58      0.74      0.65       744
          2       0.50      0.45      0.47       638
          3       0.51      0.16      0.24       217

avg / total       0.54      0.55      0.53      1599

RMSE:
0.6984302957695782


In [20]:
def run_knc_report(inputs, targets):
    knc_estimator = NearestCentroid()
    predicted = cross_val_predict(knc_estimator,inputs,targets,cv=cv)
    print(confusion_matrix_report(targets,predicted))
    print(classification_report(targets,predicted))
    try:
        print("RMSE:")
        print(root_mean_squared_error(targets,predicted))
    except(Error):
        pass

In [21]:
run_knc_report(white_norm_input.values,white_targetclass)

      __Prediction___
         1     2     3
    1 1081   266   293
    2  785   537   876
    3  139   181   740

             precision    recall  f1-score   support

          1       0.54      0.66      0.59      1640
          2       0.55      0.24      0.34      2198
          3       0.39      0.70      0.50      1060

avg / total       0.51      0.48      0.46      4898

RMSE:
0.9783278373268796


In [22]:
run_knc_report(red_norm_input.values,red_targetclass)

      __Prediction___
         1     2     3
    1  541   145    58
    2  217   220   201
    3   11    43   163

             precision    recall  f1-score   support

          1       0.70      0.73      0.72       744
          2       0.54      0.34      0.42       638
          3       0.39      0.75      0.51       217

avg / total       0.59      0.58      0.57      1599

RMSE:
1.1355861801214393


In [28]:
def run_kncs_report(inputs, targets,thresh=0):
    knc_estimator = NearestCentroid(shrink_threshold=thresh)
    predicted = cross_val_predict(knc_estimator,inputs,targets,cv=cv)
    print(confusion_matrix_report(targets,predicted))
    print(classification_report(targets,predicted))
    try:
        print("RMSE:")
        print(root_mean_squared_error(targets,predicted))
    except(Error):
        pass

In [29]:
run_kncs_report(white_norm_input.values,white_targetclass)

      __Prediction___
         1     2     3
    1 1081   266   293
    2  785   537   876
    3  139   181   740

             precision    recall  f1-score   support

          1       0.54      0.66      0.59      1640
          2       0.55      0.24      0.34      2198
          3       0.39      0.70      0.50      1060

avg / total       0.51      0.48      0.46      4898

RMSE:
0.9783278373268796


In [30]:
for t in range (0,1000,100):
    t = t/100000
    print(t)
    run_kncs_report(white_norm_input.values,white_targetclass,t)

0.0
      __Prediction___
         1     2     3
    1 1081   266   293
    2  785   537   876
    3  139   181   740

             precision    recall  f1-score   support

          1       0.54      0.66      0.59      1640
          2       0.55      0.24      0.34      2198
          3       0.39      0.70      0.50      1060

avg / total       0.51      0.48      0.46      4898

RMSE:
0.9783278373268796
0.001
      __Prediction___
         1     2     3
    1 1081   266   293
    2  785   537   876
    3  139   181   740

             precision    recall  f1-score   support

          1       0.54      0.66      0.59      1640
          2       0.55      0.24      0.34      2198
          3       0.39      0.70      0.50      1060

avg / total       0.51      0.48      0.46      4898

RMSE:
0.9783278373268796
0.002
      __Prediction___
         1     2     3
    1 1081   266   293
    2  785   537   876
    3  139   181   740

             precision    recall  f1-score   support


In [32]:
def run_knnw_report(inputs, targets):
    print("_______________________________________________________________________________________________")
    print("KNN with inverse distance as weight:")
    for n_neighbour in range(1,21):
        print(str(n_neighbour) + " neighbours:")
        knn_estimator = KNeighborsClassifier(n_neighbour,weights='distance')
        print(str(n_neighbour)+ " neighbours//")
        predicted = cross_val_predict(knn_estimator,inputs,targets,cv=cv)
        print(confusion_matrix_report(targets,predicted))
        print(classification_report(targets,predicted))
        try:
            print("RMSE:")
            print(root_mean_squared_error(targets,predicted))
        except(Error):
            pass

In [33]:
run_knnw_report(white_norm_input.values,white_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1156   416    68
    2  381  1538   279
    3   59   276   725

             precision    recall  f1-score   support

          1       0.72      0.70      0.71      1640
          2       0.69      0.70      0.69      2198
          3       0.68      0.68      0.68      1060

avg / total       0.70      0.70      0.70      4898

RMSE:
1.170970497878053
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1156   416    68
    2  381  1538   279
    3   59   276   725

             precision    recall  f1-score   support

          1       0.72      0.70      0.71      1640
          2       0.69      0.70      0.69      2198
          3       0.68      0.68      0.68      1060

avg / total       0.70      0.70      0.70      4898

RMSE:
1.17097049787

      __Prediction___
         1     2     3
    1 1178   423    39
    2  326  1662   210
    3   34   320   706

             precision    recall  f1-score   support

          1       0.77      0.72      0.74      1640
          2       0.69      0.76      0.72      2198
          3       0.74      0.67      0.70      1060

avg / total       0.73      0.72      0.72      4898

RMSE:
1.1883640287910346


In [34]:
run_knnw_report(red_norm_input.values,red_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  554   166    24
    2  158   412    68
    3   21    61   135

             precision    recall  f1-score   support

          1       0.76      0.74      0.75       744
          2       0.64      0.65      0.65       638
          3       0.59      0.62      0.61       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.1630651100782905
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  554   166    24
    2  158   412    68
    3   21    61   135

             precision    recall  f1-score   support

          1       0.76      0.74      0.75       744
          2       0.64      0.65      0.65       638
          3       0.59      0.62      0.61       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.1630651100

In [35]:
run_knnw_report(log_white_norm.values,white_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1139   412    89
    2  416  1464   318
    3   67   304   689

             precision    recall  f1-score   support

          1       0.70      0.69      0.70      1640
          2       0.67      0.67      0.67      2198
          3       0.63      0.65      0.64      1060

avg / total       0.67      0.67      0.67      4898

RMSE:
1.1382578221750783
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1139   412    89
    2  416  1464   318
    3   67   304   689

             precision    recall  f1-score   support

          1       0.70      0.69      0.70      1640
          2       0.67      0.67      0.67      2198
          3       0.63      0.65      0.64      1060

avg / total       0.67      0.67      0.67      4898

RMSE:
1.1382578221

      __Prediction___
         1     2     3
    1 1120   472    48
    2  341  1639   218
    3   40   365   655

             precision    recall  f1-score   support

          1       0.75      0.68      0.71      1640
          2       0.66      0.75      0.70      2198
          3       0.71      0.62      0.66      1060

avg / total       0.70      0.70      0.70      4898

RMSE:
1.1625713183425557


In [36]:
run_knnw_report(log_red_norm.values,red_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  558   163    23
    2  168   395    75
    3   15    70   132

             precision    recall  f1-score   support

          1       0.75      0.75      0.75       744
          2       0.63      0.62      0.62       638
          3       0.57      0.61      0.59       217

avg / total       0.68      0.68      0.68      1599

RMSE:
1.148727441218708
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  558   163    23
    2  168   395    75
    3   15    70   132

             precision    recall  f1-score   support

          1       0.75      0.75      0.75       744
          2       0.63      0.62      0.62       638
          3       0.57      0.61      0.59       217

avg / total       0.68      0.68      0.68      1599

RMSE:
1.14872744121

      __Prediction___
         1     2     3
    1  594   140    10
    2  173   418    47
    3   21    83   113

             precision    recall  f1-score   support

          1       0.75      0.80      0.78       744
          2       0.65      0.66      0.65       638
          3       0.66      0.52      0.58       217

avg / total       0.70      0.70      0.70      1599

RMSE:
1.1068607468627618


In [37]:
run_knnw_report(mm_white_norm.values,white_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1163   408    69
    2  375  1528   295
    3   60   277   723

             precision    recall  f1-score   support

          1       0.73      0.71      0.72      1640
          2       0.69      0.70      0.69      2198
          3       0.67      0.68      0.67      1060

avg / total       0.70      0.70      0.70      4898

RMSE:
1.1731479152404758
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1163   408    69
    2  375  1528   295
    3   60   277   723

             precision    recall  f1-score   support

          1       0.73      0.71      0.72      1640
          2       0.69      0.70      0.69      2198
          3       0.67      0.68      0.67      1060

avg / total       0.70      0.70      0.70      4898

RMSE:
1.1731479152

      __Prediction___
         1     2     3
    1 1157   427    56
    2  323  1635   240
    3   24   318   718

             precision    recall  f1-score   support

          1       0.77      0.71      0.74      1640
          2       0.69      0.74      0.71      2198
          3       0.71      0.68      0.69      1060

avg / total       0.72      0.72      0.72      4898

RMSE:
1.1956433778406816


In [38]:
run_knnw_report(mm_red_norm.values,red_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  553   167    24
    2  155   416    67
    3   21    66   130

             precision    recall  f1-score   support

          1       0.76      0.74      0.75       744
          2       0.64      0.65      0.65       638
          3       0.59      0.60      0.59       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.160373449191463
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  553   167    24
    2  155   416    67
    3   21    66   130

             precision    recall  f1-score   support

          1       0.76      0.74      0.75       744
          2       0.64      0.65      0.65       638
          3       0.59      0.60      0.59       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.16037344919

      __Prediction___
         1     2     3
    1  591   141    12
    2  132   454    52
    3    7    82   128

             precision    recall  f1-score   support

          1       0.81      0.79      0.80       744
          2       0.67      0.71      0.69       638
          3       0.67      0.59      0.63       217

avg / total       0.73      0.73      0.73      1599

RMSE:
1.1875421719907089


In [39]:
run_knn_report(white_norm_filtered.values,white_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1120   419   101
    2  420  1444   334
    3   84   326   650

             precision    recall  f1-score   support

          1       0.69      0.68      0.69      1640
          2       0.66      0.66      0.66      2198
          3       0.60      0.61      0.61      1060

avg / total       0.66      0.66      0.66      4898

RMSE:
1.1171662512042362
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1310   286    44
    2  799  1243   156
    3  197   484   379

             precision    recall  f1-score   support

          1       0.57      0.80      0.66      1640
          2       0.62      0.57      0.59      2198
          3       0.65      0.36      0.46      1060

avg / total       0.61      0.60      0.59      4898

RMSE:
0.721964788885416
3 neighbours:
3

In [40]:
run_knnw_report(white_norm_filtered.values,white_targetclass)

_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1120   419   101
    2  420  1444   334
    3   84   326   650

             precision    recall  f1-score   support

          1       0.69      0.68      0.69      1640
          2       0.66      0.66      0.66      2198
          3       0.60      0.61      0.61      1060

avg / total       0.66      0.66      0.66      4898

RMSE:
1.1171662512042362
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1137   404    99
    2  440  1435   323
    3   89   331   640

             precision    recall  f1-score   support

          1       0.68      0.69      0.69      1640
          2       0.66      0.65      0.66      2198
          3       0.60      0.60      0.60      1060

avg / total       0.66      0.66      0.66      4898

RMSE:
1.1008738501

      __Prediction___
         1     2     3
    1 1153   440    47
    2  339  1605   254
    3   37   397   626

             precision    recall  f1-score   support

          1       0.75      0.70      0.73      1640
          2       0.66      0.73      0.69      2198
          3       0.68      0.59      0.63      1060

avg / total       0.69      0.69      0.69      4898

RMSE:
1.1514544214192595


In [41]:
run_knn_report(red_norm_filtered.values,red_targetclass)
run_knnw_report(red_norm_filtered.values,red_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  567   165    12
    2  147   417    74
    3   18    69   130

             precision    recall  f1-score   support

          1       0.77      0.76      0.77       744
          2       0.64      0.65      0.65       638
          3       0.60      0.60      0.60       217

avg / total       0.70      0.70      0.70      1599

RMSE:
1.1796163292264188
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  641   100     3
    2  288   322    28
    3   36   121    60

             precision    recall  f1-score   support

          1       0.66      0.86      0.75       744
          2       0.59      0.50      0.55       638
          3       0.66      0.28      0.39       217

avg / total       0.64      0.64      0.62      1599

RMSE:
0.8138113605631304
3 neighbours:


             precision    recall  f1-score   support

          1       0.71      0.77      0.74       744
          2       0.58      0.60      0.59       638
          3       0.62      0.37      0.47       217

avg / total       0.64      0.65      0.64      1599

RMSE:
1.0080972981818899
_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  567   165    12
    2  147   417    74
    3   18    69   130

             precision    recall  f1-score   support

          1       0.77      0.76      0.77       744
          2       0.64      0.65      0.65       638
          3       0.60      0.60      0.60       217

avg / total       0.70      0.70      0.70      1599

RMSE:
1.1796163292264188
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  567   165    12
    2  148   416    74
    3   18    69 

      __Prediction___
         1     2     3
    1  608   131     5
    2  140   453    45
    3   12    76   129

             precision    recall  f1-score   support

          1       0.80      0.82      0.81       744
          2       0.69      0.71      0.70       638
          3       0.72      0.59      0.65       217

avg / total       0.74      0.74      0.74      1599

RMSE:
1.1761652107067264
20 neighbours:
20 neighbours//
      __Prediction___
         1     2     3
    1  606   134     4
    2  137   457    44
    3   11    78   128

             precision    recall  f1-score   support

          1       0.80      0.81      0.81       744
          2       0.68      0.72      0.70       638
          3       0.73      0.59      0.65       217

avg / total       0.75      0.74      0.74      1599

RMSE:
1.180146374784161


In [42]:
run_knn_report(f_log_red.values,red_targetclass)
run_knnw_report(f_log_red.values,red_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  568   160    16
    2  154   402    82
    3   16    66   135

             precision    recall  f1-score   support

          1       0.77      0.76      0.77       744
          2       0.64      0.63      0.64       638
          3       0.58      0.62      0.60       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.1761652107067264
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  646    92     6
    2  292   314    32
    3   37   119    61

             precision    recall  f1-score   support

          1       0.66      0.87      0.75       744
          2       0.60      0.49      0.54       638
          3       0.62      0.28      0.39       217

avg / total       0.63      0.64      0.62      1599

RMSE:
0.8037591479914785
3 neighbours:


      __Prediction___
         1     2     3
    1  577   160     7
    2  236   361    41
    3   28   126    63

             precision    recall  f1-score   support

          1       0.69      0.78      0.73       744
          2       0.56      0.57      0.56       638
          3       0.57      0.29      0.38       217

avg / total       0.62      0.63      0.62      1599

RMSE:
0.9380431509774403
_______________________________________________________________________________________________
KNN with inverse distance as weight:
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  568   160    16
    2  154   402    82
    3   16    66   135

             precision    recall  f1-score   support

          1       0.77      0.76      0.77       744
          2       0.64      0.63      0.64       638
          3       0.58      0.62      0.60       217

avg / total       0.69      0.69      0.69      1599

RMSE:
1.1761652107067264
2 neighbours:
2 neighb

      __Prediction___
         1     2     3
    1  612   125     7
    2  159   439    40
    3   12    82   123

             precision    recall  f1-score   support

          1       0.78      0.82      0.80       744
          2       0.68      0.69      0.68       638
          3       0.72      0.57      0.64       217

avg / total       0.73      0.73      0.73      1599

RMSE:
1.1383364534160079
20 neighbours:
20 neighbours//
      __Prediction___
         1     2     3
    1  615   124     5
    2  164   432    42
    3   11    88   118

             precision    recall  f1-score   support

          1       0.78      0.83      0.80       744
          2       0.67      0.68      0.67       638
          3       0.72      0.54      0.62       217

avg / total       0.73      0.73      0.73      1599

RMSE:
1.126185012742946


In [49]:
run_knn_report(f_log_white.values,white_targetclass)
run_knnw_report(f_log_white.values,white_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1081   461    98
    2  439  1413   346
    3   98   327   635

             precision    recall  f1-score   support

          1       0.67      0.66      0.66      1640
          2       0.64      0.64      0.64      2198
          3       0.59      0.60      0.59      1060

avg / total       0.64      0.64      0.64      4898

RMSE:
0.6936979336808926
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1278   319    43
    2  810  1228   160
    3  199   520   341

             precision    recall  f1-score   support

          1       0.56      0.78      0.65      1640
          2       0.59      0.56      0.58      2198
          3       0.63      0.32      0.43      1060

avg / total       0.59      0.58      0.57      4898

RMSE:
0.7529715191265614
3 neighbours:


             precision    recall  f1-score   support

          1       0.67      0.66      0.66      1640
          2       0.64      0.64      0.64      2198
          3       0.59      0.60      0.59      1060

avg / total       0.64      0.64      0.64      4898

RMSE:
0.6936979336808926
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1095   451    94
    2  459  1399   340
    3  101   336   623

             precision    recall  f1-score   support

          1       0.66      0.67      0.66      1640
          2       0.64      0.64      0.64      2198
          3       0.59      0.59      0.59      1060

avg / total       0.64      0.64      0.64      4898

RMSE:
0.6950210844864835
3 neighbours:
3 neighbours//
      __Prediction___
         1     2     3
    1 1106   437    97
    2  437  1434   327
    3   78   359   623

             precision    recall  f1-score   support

          1       0.68      0.67      0.68      1640
          2       0

      __Prediction___
         1     2     3
    1 1147   439    54
    2  361  1605   232
    3   38   396   626

             precision    recall  f1-score   support

          1       0.74      0.70      0.72      1640
          2       0.66      0.73      0.69      2198
          3       0.69      0.59      0.63      1060

avg / total       0.69      0.69      0.69      4898

RMSE:
0.6055413096266453


In [48]:
run_knn_report(f_mm_red.values,red_targetclass)
run_knnw_report(f_mm_red.values,red_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1  567   163    14
    2  142   420    76
    3   15    66   136

             precision    recall  f1-score   support

          1       0.78      0.76      0.77       744
          2       0.65      0.66      0.65       638
          3       0.60      0.63      0.61       217

avg / total       0.70      0.70      0.70      1599

RMSE:
0.5933759848629978
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1  639   103     2
    2  281   325    32
    3   34   125    58

             precision    recall  f1-score   support

          1       0.67      0.86      0.75       744
          2       0.59      0.51      0.55       638
          3       0.63      0.27      0.38       217

avg / total       0.63      0.64      0.62      1599

RMSE:
0.6545171850042718
3 neighbours:


      __Prediction___
         1     2     3
    1  575   158    11
    2  139   426    73
    3   18    69   130

             precision    recall  f1-score   support

          1       0.79      0.77      0.78       744
          2       0.65      0.67      0.66       638
          3       0.61      0.60      0.60       217

avg / total       0.71      0.71      0.71      1599

RMSE:
0.5891450860847319
4 neighbours:
4 neighbours//
      __Prediction___
         1     2     3
    1  580   151    13
    2  138   429    71
    3   16    70   131

             precision    recall  f1-score   support

          1       0.79      0.78      0.78       744
          2       0.66      0.67      0.67       638
          3       0.61      0.60      0.61       217

avg / total       0.71      0.71      0.71      1599

RMSE:
0.5843487097907776
5 neighbours:
5 neighbours//
      __Prediction___
         1     2     3
    1  593   141    10
    2  128   445    65
    3   18    71   128

           

In [47]:
run_knn_report(f_mm_white.values,white_targetclass)
run_knnw_report(f_mm_white.values,white_targetclass)

_______________________________________________________________________________________________
KNN, no weights
1 neighbours:
1 neighbours//
      __Prediction___
         1     2     3
    1 1118   418   104
    2  436  1423   339
    3   79   336   645

             precision    recall  f1-score   support

          1       0.68      0.68      0.68      1640
          2       0.65      0.65      0.65      2198
          3       0.59      0.61      0.60      1060

avg / total       0.65      0.65      0.65      4898

RMSE:
0.6794240108541295
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1281   321    38
    2  795  1243   160
    3  183   490   387

             precision    recall  f1-score   support

          1       0.57      0.78      0.66      1640
          2       0.61      0.57      0.58      2198
          3       0.66      0.37      0.47      1060

avg / total       0.60      0.59      0.58      4898

RMSE:
0.735552280958793
3 neighbours:
3

             precision    recall  f1-score   support

          1       0.68      0.68      0.68      1640
          2       0.65      0.65      0.65      2198
          3       0.59      0.61      0.60      1060

avg / total       0.65      0.65      0.65      4898

RMSE:
0.6794240108541295
2 neighbours:
2 neighbours//
      __Prediction___
         1     2     3
    1 1132   410    98
    2  453  1419   326
    3   81   345   634

             precision    recall  f1-score   support

          1       0.68      0.69      0.68      1640
          2       0.65      0.65      0.65      2198
          3       0.60      0.60      0.60      1060

avg / total       0.65      0.65      0.65      4898

RMSE:
0.6777692615535915
3 neighbours:
3 neighbours//
      __Prediction___
         1     2     3
    1 1119   430    91
    2  439  1435   324
    3   67   336   657

             precision    recall  f1-score   support

          1       0.69      0.68      0.69      1640
          2       0