In [82]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt

%matplotlib inline

In [83]:
df = pd.read_csv('Autism_Data.csv')
df.columns = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 
              'A7', 'A8', 'A9', 'A10', 'age', 'gender',
             'ethnicity', 'jundice', 'autism', 'country', 
             'app', 'result', 'age_category', 'relation',
             'app_prediction']
df.head()

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,gender,ethnicity,jundice,autism,country,app,result,age_category,relation,app_prediction
0,1,1,1,1,0,0,1,1,0,0,...,f,White-European,no,no,United States,no,6,18 and more,Self,NO
1,1,1,0,1,0,0,0,1,0,1,...,m,Latino,no,yes,Brazil,no,5,18 and more,Self,NO
2,1,1,0,1,1,0,1,1,1,1,...,m,Latino,yes,yes,Spain,no,8,18 and more,Parent,YES
3,1,1,0,1,0,0,1,1,0,1,...,f,White-European,no,yes,United States,no,6,18 and more,Self,NO
4,1,0,0,0,0,0,0,1,0,0,...,f,?,no,no,Egypt,no,2,18 and more,?,NO


In [84]:
gender_row = pd.Series(np.where(df['gender'] == 'm', 1, 0))
jund_row = pd.Series(np.where(df['jundice'] == 'yes', 1, 0))
autism_row = pd.Series(np.where(df['autism'] == 'yes', 1, 0))

df['gender'] = gender_row
df['jundice'] = jund_row
df['autism'] = autism_row

df.head()

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,gender,ethnicity,jundice,autism,country,app,result,age_category,relation,app_prediction
0,1,1,1,1,0,0,1,1,0,0,...,0,White-European,0,0,United States,no,6,18 and more,Self,NO
1,1,1,0,1,0,0,0,1,0,1,...,1,Latino,0,1,Brazil,no,5,18 and more,Self,NO
2,1,1,0,1,1,0,1,1,1,1,...,1,Latino,1,1,Spain,no,8,18 and more,Parent,YES
3,1,1,0,1,0,0,1,1,0,1,...,0,White-European,0,1,United States,no,6,18 and more,Self,NO
4,1,0,0,0,0,0,0,1,0,0,...,0,?,0,0,Egypt,no,2,18 and more,?,NO


In [85]:
X = df[['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'gender', 'jundice']]
y = df['autism']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [86]:
# hidden_layer_sizes = tuple, ith element is number of neurons in ith layer.
# solver = {sgd, adam, lbfgs}


clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(12), random_state=1)
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)

print('without over sampling the data:')
print('accuracy = ', accuracy)

without over sampling the data:
accuracy =  0.839622641509434


In [87]:
#  using smote...
X_smote, y_smote = SMOTE().fit_sample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.3, random_state=101)

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(12), random_state=1)
clf.fit(X_train, y_train)
smote_accuracy = clf.score(X_test, y_test)
cross_val = cross_val_score(clf, X_smote, y_smote, cv=5)


print('after data has been over sampled:')
print('accuracy = ', smote_accuracy)
print('cross value accuracy = ', cross_val)

after data has been over sampled:
accuracy =  0.8016304347826086
cross value accuracy =  [0.78455285 0.83739837 0.82926829 0.77459016 0.84836066]
