# IMPORT LIBRARIES

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support

# IMPORT DATASET

In [3]:
df = pd.read_csv('diabetics_data.csv')

In [None]:
df.head()

In [None]:
df.columns = df.iloc[0]

In [None]:
df = df.drop(df.index[0])

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.count()

In [None]:
df.isnull().values.ravel().sum()

In [None]:
columns = list(df.columns)

In [None]:
columns

# Fill missing values with mean

In [None]:
df = df.fillna(df.mean())

In [None]:
df.describe()

# VISUALIZATION

In [None]:
pd.crosstab([df['Fasting Blood Glucose']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Fasting Blood Glucose'].min()
_max = df['Fasting Blood Glucose'].max()
print(f"Min fasting blood sugar is {_min} and maximum is {_max}")

In [None]:
#divide Fasting Blood Glucose into 5 bands and visualize
df['Fasting Blood Glucose_band'] = pd.qcut(df['Fasting Blood Glucose'], 5, labels = False)
pd.qcut(df['Fasting Blood Glucose'], q=5)

# Fasting Blood Glucose VS Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Fasting Blood Glucose_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Hct']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Hct'].min()
_max = df['Hct'].max()
print(f"Min Hct is {_min} and maximum is {_max}")

In [None]:
#divide Hct into 5 bands and visualize
df['Hct_band'] = pd.qcut(df['Hct'], 5, labels = False)
pd.qcut(df['Hct'], q=5)

# Hct vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Hct_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Hgb']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Hgb'].min()
_max = df['Hgb'].max()
print(f"Min Hgb is {_min} and maximum is {_max}")

In [None]:
#divide Hct into 5 bands and visualize
df['Hgb_band'] = pd.qcut(df['Hgb'], 5, labels = False)
pd.qcut(df['Hgb'], q=5)

# Hgb vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Hgb_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Mch']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Mch'].min()
_max = df['Mch'].max()
print(f"Min Mch is {_min} and maximum is {_max}")

In [None]:
#divide Hct into 5 bands and visualize
df['Mch_band'] = pd.qcut(df['Mch'], 5, labels = False)
pd.qcut(df['Mch'], q=5)

# Mch vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Mch_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Mchc']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Mchc'].min()
_max = df['Mchc'].max()
print(f"Min Mchc is {_min} and maximum is {_max}")

In [None]:
#divide Hct into 5 bands and visualize
df['Mchc_band'] = pd.qcut(df['Mchc'], 5, labels = False)
pd.qcut(df['Mchc'], q=5)

# Mchc vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Mchc_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Mcv']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Mcv'].min()
_max = df['Mcv'].max()
print(f"Min Mcv is {_min} and maximum is {_max}")

In [None]:
#divide Hct into 5 bands and visualize
df['Mcv_band'] = pd.qcut(df['Mcv'], 5, labels = False)
pd.qcut(df['Mcv'], q=5)

# Mcv vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Mcv_band'],hue='Result',data=df)
plt.show()


In [None]:
pd.crosstab([df['Pdw']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Pdw'].min()
_max = df['Pdw'].max()
print(f"Min Pdw is {_min} and maximum is {_max}")

In [None]:
#divide Pdw into 5 bands and visualize
df['Pdw_band'] = pd.qcut(df['Pdw'], 5, labels = False)
pd.qcut(df['Pdw'], q=5)

# Pdw vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Pdw_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Plt']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Plt'].min()
_max = df['Plt'].max()
print(f"Min Plt is {_min} and maximum is {_max}")

In [None]:
#divide Plt into 5 bands and visualize
df['Plt_band'] = pd.qcut(df['Plt'], 5, labels = False)
pd.qcut(df['Plt'], q=5)

# Plt vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Plt_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Rbc']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Rbc'].min()
_max = df['Rbc'].max()
print(f"Min Rbc is {_min} and maximum is {_max}")

In [None]:
#divide Rbc into 5 bands and visualize
df['Rbc_band'] = pd.qcut(df['Rbc'], 5, labels = False)
pd.qcut(df['Rbc'], q=5)

# Rbc vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Rbc_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Rdw-Cv']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Rdw-Cv'].min()
_max = df['Rdw-Cv'].max()
print(f"Min Rdw-Cv is {_min} and maximum is {_max}")

In [None]:
#divide Rdw-Cv into 5 bands and visualize
df['Rdw-Cv_band'] = pd.qcut(df['Rdw-Cv'], 5, labels = False)
pd.qcut(df['Rdw-Cv'], q=5)

# Rdw-Cv vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Rdw-Cv_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Rdw-Sd']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Rdw-Sd'].min()
_max = df['Rdw-Sd'].max()
print(f"Min Rdw-Sd is {_min} and maximum is {_max}")

In [None]:
#divide Rdw-Sd into 5 bands and visualize
df['Rdw-Sd_band'] = pd.qcut(df['Rdw-Sd'], 5, labels = False)
pd.qcut(df['Rdw-Sd'], q=5)

# Rdw-Sd vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Rdw-Sd_band'],hue='Result',data=df)
plt.show()

In [None]:
pd.crosstab([df['Wbc']],df['Result']).style.background_gradient(cmap='summer_r')

In [None]:
_min = df['Wbc'].min()
_max = df['Wbc'].max()
print(f"Min Wbc is {_min} and maximum is {_max}")

In [None]:
#divide Wbc into 5 bands and visualize
df['Wbc_band'] = pd.qcut(df['Wbc'], 5, labels = False)
pd.qcut(df['Wbc'], q=5)

# Wbc vs Result

In [None]:
fig_dims = (30, 30)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(df['Wbc_band'],hue='Result',data=df)
plt.show()

# One Hot Encoding of Results Feature

In [None]:
from sklearn.preprocessing import LabelBinarizer

In [None]:
lb = LabelBinarizer()
df['Result'] = lb.fit_transform(df['Result'])

# Correlation Matrix

In [None]:
sns.heatmap(df.corr(),annot=True,cmap='RdYlGn',linewidths=0.2)
fig=plt.gcf()
fig.set_size_inches(20,18)
plt.show() 

# PAIR PLOT

In [None]:
sns.pairplot(df, hue="Result")
plt.show()

# Create Two Sets of Dataset one with Fasting_Blood_Glucose and the other with the feature dropped

In [None]:
df.columns

In [None]:
df_1 = df[['Fasting Blood Glucose', 'Hct', 'Hgb', 'Mch', 'Mchc', 'Mcv',
       'Pdw', 'Plt', 'Rbc', 'Rdw-Cv', 'Rdw-Sd', 'Wbc']]

In [None]:
df_2 = df[['Hct', 'Hgb', 'Mch', 'Mchc', 'Mcv',
       'Pdw', 'Plt', 'Rbc', 'Rdw-Cv', 'Rdw-Sd', 'Wbc']]

# Split Dataset into Test and Train

In [None]:
#data with fasting blood glucose
from sklearn.model_selection import train_test_split
X1_train, X1_test, y1_train, y1_test = train_test_split(df_1, df['Result'], test_size=0.3) # 70% training and 30% test

In [None]:
#data without fasting blood glucose
X2_train, X2_test, y2_train, y2_test = train_test_split(df_2, df['Result'], test_size=0.3) # 70% training and 30% test

# TRAIN WITH RANDOM FOREST

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=20)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X1_train,y1_train)

y1_predA=clf.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y1_predA))
print("f1 score:",f1_score(y1_test, y1_predA, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y1_predA, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=20),X1_train,y1_train,cv=10)
sns.heatmap(confusion_matrix(y1_train,y_pred),annot=True,fmt='2.0f')

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=50)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X1_train,y1_train)

y1_predB=clf.predict(X1_test)

In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=50),X1_train,y1_train,cv=10)
sns.heatmap(confusion_matrix(y1_train,y_pred),annot=True,fmt='2.0f')


In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y1_predB))
print("f1 score:",f1_score(y1_test, y1_predB, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y1_predB, average='macro'))


In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=70)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X1_train,y1_train)

y1_predC=clf.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y1_predC))
print("f1 score:",f1_score(y1_test, y1_predC, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y1_predC, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=70),X1_train,y1_train,cv=10)
sns.heatmap(confusion_matrix(y1_train,y_pred),annot=True,fmt='2.0f')

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X1_train,y1_train)

y1_predD=clf.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y1_predD))
print("f1 score:",f1_score(y1_test, y1_predD, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y1_predD, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=100),X1_train,y1_train,cv=10)
sns.heatmap(confusion_matrix(y1_train,y_pred),annot=True,fmt='2.0f')

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=20)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X2_train,y2_train)

y2_predA=clf.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y2_predA))
print("f1 score:",f1_score(y2_test, y2_predA, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_predA, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=20),X2_train,y2_train,cv=10)
sns.heatmap(confusion_matrix(y2_train,y_pred),annot=True,fmt='2.0f')

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=50)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X2_train,y2_train)

y2_predB=clf.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y2_predB))
print("f1 score:",f1_score(y2_test, y2_predB, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_predB, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=50),X2_train,y2_train,cv=10)
sns.heatmap(confusion_matrix(y2_train,y_pred),annot=True,fmt='2.0f')

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=70)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X2_train,y2_train)

y2_predC=clf.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y2_predC))
print("f1 score:",f1_score(y2_test, y2_predC, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_predC, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=70),X2_train,y2_train,cv=10)
sns.heatmap(confusion_matrix(y2_train,y_pred),annot=True,fmt='2.0f')

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X2_train,y2_train)

y2_predD=clf.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y2_predD))
print("f1 score:",f1_score(y2_test, y2_predD, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_predD, average='macro'))


In [None]:
f,ax=plt.subplots(0,0,figsize=(12,10))

y_pred = cross_val_predict(RandomForestClassifier(n_estimators=100),X2_train,y2_train,cv=10)
sns.heatmap(confusion_matrix(y2_train,y_pred),annot=True,fmt='2.0f')

# Using Naive Bayes for Data with Fasting Blood Glucose


In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
#Create a Gaussian Classifier
gnb = GaussianNB()

#Train the model using the training sets
gnb.fit(X1_train, y1_train)

#Predict the response for test dataset
y1_pred = gnb.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y1_pred))
print("f1 score:",f1_score(y1_test, y1_pred, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y1_pred, average='macro'))


# Using Naive Bayes for Data without Fasting Blood Glucose

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
#Create a Gaussian Classifier
gnb = GaussianNB()

#Train the model using the training sets
gnb.fit(X2_train, y2_train)

#Predict the response for test dataset
y2_pred = gnb.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y2_pred))
print("f1 score:",f1_score(y2_test, y2_pred, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_pred, average='macro'))


# USING SUPPORT VECTOR MACHINE ON DATA WITH FASTING BLOOD GLUCOSE

In [None]:
from sklearn.svm import SVC
svclassifier_1 = SVC(kernel='linear')
svclassifier_1.fit(X1_train, y1_train)

In [None]:
y1_pred_SVM = svclassifier_1.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y1_pred_SVM))
print("f1 score:",f1_score(y1_test, y1_pred_SVM, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y1_pred_SVM, average='macro'))


# USING SUPPORT VECTOR MACHINE ON DATA WITHOUTH FASTING BLOOD GLUCOSE


In [None]:
from sklearn.svm import SVC
svclassifier_1 = SVC(kernel='linear')
svclassifier_1.fit(X2_train, y2_train)

In [None]:
y2_pred_SVM = svclassifier_1.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y2_pred_SVM))
print("f1 score:",f1_score(y2_test, y2_pred_SVM, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_pred_SVM, average='macro'))


# USING DECISION TREE ON DATA WITHOUTH FASTING BLOOD GLUCOSE


In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X1_train, y1_train)

#Predict the response for test dataset
y_pred = clf.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y_pred))
print("f1 score:",f1_score(y1_test, y_pred, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y_pred, average='macro'))


# USING DECISION TREE ON DATA WITHOUTH FASTING BLOOD GLUCOSE


In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X2_train, y2_train)

#Predict the response for test dataset
y_pred = clf.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y_pred))
print("f1 score:",f1_score(y2_test, y2_pred, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y2_pred, average='macro'))


# USING K NEAREST NEIGBOUR CLASSIFIER ON DATA WITH FASTING BLOOD GLUCOSE

In [None]:
from sklearn.neighbors import KNeighborsClassifier 

In [None]:
model = KNeighborsClassifier() 
model.fit(X1_train,y1_train)
y_pred = model.predict(X1_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y1_test, y_pred))
print("f1 score:",f1_score(y1_test, y_pred, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y1_test, y_pred, average='macro'))


# USING K NEAREST NEIGBOUR CLASSIFIER ON DATA WITHOUT FASTING BLOOD GLUCOSE

In [None]:
from sklearn.neighbors import KNeighborsClassifier 

In [None]:
model = KNeighborsClassifier() 
model.fit(X2_train,y2_train)
y_pred = model.predict(X2_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y2_test, y_pred))
print("f1 score:",f1_score(y2_test, y_pred, zero_division=1))
print("precision, recall, F-measure and support:",precision_recall_fscore_support(y2_test, y_pred, average='macro'))
