# Performing Classification with Different Algorithms:


In [2]:
# importing required libraries:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 


In [3]:
# reading data:
df = pd.read_csv('diabetes.csv')
#take a look on data:
df.head(5)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
# spliting the required attributes: 
indecators = df.iloc[:,0:7]
X = indecators
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction
0,6,148,72,35,0,33.6,0.627
1,1,85,66,29,0,26.6,0.351
2,8,183,64,0,0,23.3,0.672
3,1,89,66,23,94,28.1,0.167
4,0,137,40,35,168,43.1,2.288


In [5]:
predication = df.iloc[:,8:]
y = predication
y.head()

Unnamed: 0,Outcome
0,1
1,0
2,1
3,0
4,1


In [6]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25,random_state = 0)

In [7]:
#handling missing values:
from sklearn.impute import SimpleImputer
my_imputer = SimpleImputer()
imputed_X_train = pd.DataFrame(my_imputer.fit_transform(X_train))
imputed_X_test = pd.DataFrame(my_imputer.transform(X_test))
# Imputation removed column names; put them back
imputed_X_train.columns = X_train.columns
imputed_X_test.columns = X_test.columns

In [8]:
# Feature Scaling:
# this for a voiding unblancing of data: 
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## (1) Decision Tree Model:

In [9]:
#finding the the mean absolute error:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error
diabetes_model = DecisionTreeRegressor()
diabetes_model.fit(imputed_X_train, y_train)
X_validation = diabetes_model.predict(imputed_X_test)
decision_tree_error = mean_absolute_error( y_test ,X_validation)
print(decision_tree_error)

0.296875


## (2) Random Forest Model:

In [10]:
from sklearn.ensemble import RandomForestRegressor
forest_model = RandomForestRegressor(random_state=0)
forest_model.fit(imputed_X_train, y_train.values.ravel())
X_validation_forest = forest_model.predict(imputed_X_test)
random_forest_error = mean_absolute_error(y_test, X_validation_forest)
print(random_forest_error)

0.29343749999999996


## (3) KNN Model:

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

#feature scaling 
X_scale = StandardScaler()
imputed_X_train = X_scale.fit_transform(imputed_X_train)
imputed_X_test = X_scale.transform(imputed_X_test)

In [12]:
from scipy.sparse.sputils import matrix
KNN_model = KNeighborsClassifier(n_neighbors= 27, p=2, metric='euclidean')
KNN_model.fit(imputed_X_train, y_train.values.ravel())
X_validation_KNN = KNN_model.predict(imputed_X_test)

In [13]:
cm = confusion_matrix(y_test, X_validation_KNN)
print(cm)

[[122   8]
 [ 36  26]]


## (4) Loggistic Regression:

In [14]:
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression(max_iter= 180)
logistic_model.fit(imputed_X_train, y_train.values.ravel())
X_validation_logistic = logistic_model.predict(imputed_X_test)
logistic_error = mean_absolute_error(y_test, X_validation_logistic)
print(logistic_error)

0.19791666666666666


>## Comparing Results:

In [15]:
print("1-FOR Decision Tree Model >> ")
print("\t Accuracy: " + str(accuracy_score(y_test, X_validation) *100) + "%")
print("\t F Score : " + str(f1_score(y_test, X_validation) *100) + "%")
#**************************#
print("1-FOR KNN Model >> ")
print("\t Accuracy: " + str(accuracy_score(y_test, X_validation_KNN) *100) + "%")
print("\t F Score : " + str(f1_score(y_test, X_validation_KNN) *100) + "%")
#******************************#
print("1-FOR loggistic Regression Model >> ")
print("\t Accuracy: " + str(accuracy_score(y_test, X_validation_logistic) *100) + "%")
print("\t F Score : " + str(f1_score(y_test, X_validation_logistic) *100) + "%")

1-FOR Decision Tree Model >> 
	 Accuracy: 70.3125%
	 F Score : 55.118110236220474%
1-FOR KNN Model >> 
	 Accuracy: 77.08333333333334%
	 F Score : 54.166666666666664%
1-FOR loggistic Regression Model >> 
	 Accuracy: 80.20833333333334%
	 F Score : 66.07142857142857%
