<a href="https://colab.research.google.com/github/abdoulayegk/Data-For-Machine-Learning/blob/master/deeplearning1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# organize imports
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split
import pandas as pd

# seed for reproducing same results
seed = 9
np.random.seed(seed)

# load pima indians dataset
dataset = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv")
dataset.head()
# split into input and output variables
X = dataset.drop('Outcome',axis=1)
y = dataset.Outcome

(X_train, X_test, Y_train, Y_test) = train_test_split(X, y, test_size=0.3, random_state=42)

# create the model
model = keras.Sequential()
model.add(layers.Dense(8, input_dim=8, activation='relu'))
model.add(layers.Dense(6, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

# compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# fit the model
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=5, verbose=0)

# evaluate the model
scores = model.evaluate(X_test, Y_test)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
clf = SGDClassifier()
clf.fit(X_train,Y_train)
y_pred = clf.predict(X_test)
y_pred

In [None]:
clf.score(X_test,Y_test)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X_train,X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=.25)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(clf, X_train, y_train, cv=3, scoring="accuracy")

In [None]:
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
f1_score(y_test, y_pred)
accuracy_score(y_test, y_pred)

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

In [None]:
cross_val_score(gb, X_train, y_train, cv=3, scoring="accuracy")

In [None]:
y_prd1 = gb.predict(X_test)

In [None]:
accuracy_score(y_test, y_prd1)

In [None]:
dataset.corr()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(13,7))
sns.heatmap(dataset.corr(), annot=True)

In [None]:
Xt = dataset.drop('Outcome',axis=1)
yt = dataset.Outcome


In [None]:
from sklearn.ensemble import ExtraTreesClassifier

In [None]:
tree = ExtraTreesClassifier()
tree.fit(Xt,yt)

In [None]:

print(tree.feature_importances_)
feat_importances = pd.Series(tree.feature_importances_, index=Xt.columns)
feat_importances.nlargest(10).plot(kind='barh')
plt.show()

In [None]:
plt.figure(figsize=(10,7))
dataset.boxplot()

In [None]:
df = dataset.copy()

In [None]:
df.head()

In [None]:
df.boxplot()

In [None]:
dataset.shape

In [None]:
# let's remove outliers and see how the model is improving 
from scipy import stats
import numpy as np


In [None]:

#find absolute value of z-score for each observation
z = np.abs(stats.zscore(df))

#only keep rows in dataframe with all z-scores less than absolute value of 3 
df_clean = df[(z<3).all(axis=1)]

#find how many rows are left in the dataframe 
df_clean.shape

In [None]:
df_clean

In [None]:
df_clean.isna().sum()

In [None]:
X = df_clean.drop('Outcome', axis=1)
y = df_clean.Outcome

In [None]:
X

In [None]:
df_clean.boxplot()

In [None]:
X_train,X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
sgc = SGDClassifier()
sgc.fit(X_train,y_train)

In [None]:
y_hat = sgc.predict(X_test)
y_hat

In [None]:
accuracy_score(y_test, y_hat)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_train,y_train)
y_hat1 = rf.predict(X_test)
accuracy_score(y_test,y_hat1)

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)
y_hat2 = nb.predict(X_test)
accuracy_score(y_test, y_hat2)

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(max_iter=100)
logreg.fit(X_train, y_train)
y_hat3 = logreg.predict(X_test)
accuracy_score(y_test, y_hat3)