In [None]:
import pandas as pd
import numpy as np

In [None]:
import time
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from sklearn.metrics import confusion_matrix , classification_report , accuracy_score
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

In [None]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from datetime import datetime

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
%matplotlib inline

In [None]:
df = pd.read_csv('diabetes.csv')

In [None]:
df.head()

In [None]:
# checking if there is any null data point
df.isnull().values.any()

In [None]:
sns.pairplot(df , hue='Outcome')

*finding Correlation between target field and input fields*

In [None]:
df.corr()['Outcome'][:-1].sort_values().plot(kind='bar')

# corr = df.corr()
# plt.figure(figsize=(20,15))
# sns.heatmap(corr, annot=True , cmap='viridis')

In [None]:
diabetes_true_count = len(df.loc[df['Outcome'] == 1])
diabetes_false_count = len(df.loc[df['Outcome'] == 0])

In [None]:
print(f'Number of Diabetic peoples: {diabetes_true_count}')
print(f'Number of Non Diabetic peoples: {diabetes_false_count}')

In [None]:
# sns.countplot(x=df['Outcome'])

# Spliting the data into traning and testing data

In [None]:
x = df.drop('Outcome' , axis=1)
y = df['Outcome']

x_train , x_test, y_train , y_test = train_test_split(x , y , test_size=0.3 , random_state=101)

#  checking for any meaning less values for ex. age = 0     /(O_X)\ 

In [None]:
print(f'Number of rows: {len(df)}')
print(f"Number of missing Glucose values: {len(df.loc[df['Glucose'] == 0])}")
print(f"Number of missing BloodPressure values: {len(df.loc[df['BloodPressure'] == 0])}")
print(f"Number of missing Insulin values: {len(df.loc[df['Insulin'] == 0])}")
print(f"Number of missing BMI values: {len(df.loc[df['BMI'] == 0])}")
print(f"Number of missing SkinThickness values: {len(df.loc[df['SkinThickness'] == 0])}")
print(f"Number of missing Age values: {len(df.loc[df['Age'] == 0])}")
print(f"Number of missing DiabetesPedigreeFunction values: {len(df.loc[df['DiabetesPedigreeFunction'] == 0])}")

# FIlling those values with average values 

In [None]:
imputer = SimpleImputer(missing_values=0 , strategy='mean')

x_train_preg = []
x_test_preg = []
for i in x_train['Pregnancies']:
    x_train_preg.append(i)
for i in x_test['Pregnancies']:
    x_test_preg.append(i)

x_train = pd.DataFrame(imputer.fit_transform(x_train) , columns=x_train.columns)
x_train['Pregnancies'] = x_train_preg

x_test = pd.DataFrame(imputer.fit_transform(x_test) , columns=x_test.columns)
x_test['Pregnancies'] = x_test_preg

# scalling the data

In [None]:
scaler = MinMaxScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
x_test

# Creating an Artificial Neural Network ;)

*random-forest model*

In [None]:
# model = RandomForestClassifier(n_estimators=50 , random_state=1)
# model.fit(x_train , y_train)

In [None]:
model = load_model('model.h5') 

*artificial neural network (ANN model)*

In [None]:
# model = Sequential()

# model.add(Dense(8, activation='relu'))
# model.add(Dropout(0.2))
# model.add(Dense(10, activation='relu'))
# model.add(Dropout(0.2))
# model.add(Dense(5, activation='relu'))
# model.add(Dropout(0.2))
# model.add(Dense(1, activation='sigmoid'))

# model.compile(loss='binary_crossentropy', optimizer='Adam')

# early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)

# timestamp = datetime.now().strftime("%Y-%m-%d--%H%M")
# log_directory = 'logs/fit' + '/' + timestamp 

# board = TensorBoard(log_directory,
#                     histogram_freq=1,
#                     write_graph=True,
#                     write_images=True,
#                     update_freq='epoch',
#                     profile_batch=2,
#                     embeddings_freq=1)

*training the model*

In [None]:
# model.fit(x=x_train, y=y_train, epochs=600, validation_data=(x_test,y_test), callbacks=[early_stop, board])

In [None]:
# model_loss = pd.DataFrame(model.history.history)
# model_loss.plot(figsize=(10,8))

*testing the model*

In [None]:
# testing the model
predictions = np.round(model.predict(x_test))

In [None]:
print(f'Confusion matrix of predictions:- \n{confusion_matrix(y_test , predictions)}', end='\n\n')
print(f'Classification report of predictions:- \n{classification_report(y_test , predictions)}', end='\n')

*saving trained model in model.h5 file*

In [None]:
model.save('model.h5')

In [None]:
def predictor(Pregnancies , Glucose , BloodPressure , SkinThickness , Insulin , BMI,  DiabetesPedigreeFunction , Age):
    x_data = [[Pregnancies , Glucose , BloodPressure , SkinThickness , Insulin , BMI , DiabetesPedigreeFunction , Age]]
    x_data = scaler.transform(x_data)
    
    prediction_percent = model.predict(x_data)
    prediction_overall = np.round(prediction_percent)
    
    print(f'\n The chances of you being diabetic is: {np.round(prediction_percent[0][0], decimals=2)}%')
    if prediction_overall[0] == 0:
        return "\n Congrats.... you are not diabetic but you sould have a consultaion with doctor"
    
    elif prediction_overall[0] == 1:
        return "\n It seems you are diabetic and you sould have a consultaion with doctor"

## mean value of every cloumn

In [None]:
print(f"Mean of Glucose: {np.mean(df['Glucose'])}")
print(f"Mean of BloodPressure: {np.mean(df['BloodPressure'])}")
print(f"Mean of Insulin: {np.mean(df['Insulin'])}")
print(f"Mean of BMI: {np.mean(df['BMI'])}")
print(f"Mean of Skin-Thickness: {np.mean(df['SkinThickness'])}")
print(f"Mean of Diabetes-Pedigree-Function: {np.mean(df['DiabetesPedigreeFunction'])}")

In [None]:
# inputs from user
age = int(input('Age: '))
glucose = int(input('glucose concentration: '))
bp = int(input('Blood pressure: '))
number_of_pregnancies = int(input('Number of pregnancies: '))
insulin = float(input('Insulin : '))
skin_thickness = int(input('Skin Thickness : '))
bmi = float(input('Body mass index : '))
DPF = float(input('Diabetes Pedigree Fucntion : '))

start_time = time.time()

# sending user data to predictor function
print(predictor(number_of_pregnancies ,glucose , bp , skin_thickness , insulin , bmi , DPF , age))

print(f"\n Total time taken in prediction:  {time.time()-start_time}")