In [473]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn .metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier


import plotly.graph_objects as go
import plotly.express as px
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model

sns.set_style("darkgrid")
pd.set_option("display.max_columns", None)   # setting to display all columns
pd.options.plotting.backend = "plotly"


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


seed = 143
np.random.seed(seed)

%matplotlib inline

In [474]:
data = pd.read_csv('../input/autism-screening-on-adults/autism_screening.csv')

In [475]:
len(data)

In [476]:
data.head()

In [477]:
data.columns

In [478]:
data.nunique()

In [479]:
for col in df.select_dtypes('O').columns:
    print("-------------------------------")
    print(f'Column name: {col}\n')
    print(f'Unique values:\n{df[col].unique()}\n\n')

In [480]:
data['ethnicity'] = data['ethnicity'].replace('?', 'Others')

In [481]:
data['ethnicity'] = data['ethnicity'].replace('others', 'Others')

In [482]:
df['ethnicity'].unique()

In [483]:
data['relation'] = data['relation'].replace('?', data['relation'].mode()[0])

In [484]:
data['relation'].unique()

In [485]:
data.isnull().sum()

In [486]:
data.replace('?', np.nan, inplace=True)
data = data.rename(columns={'Class/ASD' : 'classASD'})
data = data.rename(columns={'austim' : 'autism'})
data.info()

In [487]:
sns.swarmplot(x='classASD', y='result', data=data)

In [488]:
sns.distplot(data['age'])

In [489]:
print(df['age'].describe())
sns.distplot(df['age'], bins=50, kde = False)

In [490]:
fig = df.groupby('gender').agg({'age':'mean'}).plot(kind='bar',                                                    
                                                    template = "seaborn",
                                                    labels = {"gender":"Gender",
                                                              "value":"Age"},
                                                    color_discrete_sequence = ["#84b1b5"]);

fig.update_layout(title = "<b>Average Age</b>\n",
                  title_font = dict(size = 20),)


fig.show()

In [491]:
cols = ['gender','jundice','autism','used_app_before']
for i in cols:
    sns.swarmplot(x='classASD', y=i, data=df)
    plt.show()

In [492]:
fig = sns.barplot(y=df['contry_of_res'].value_counts().index[:15], x=df['contry_of_res'].value_counts().values[:15], data=df)
fig.set(xlabel='Count', ylabel='Country')
plt.show()

In [493]:
fig = px.pie(df, names = "gender",
             title = "<b>Counts of Male and Female</b>",
             hole = 0.5, template = "plotly_dark")

fig.update_traces(textposition='inside',
                  textinfo='percent+label',
                  marker=dict(line=dict(color='#000000', width = 1.5)))

fig.update_layout(title_x = 0.5,
                  title_font = dict(size = 20),
                  uniformtext_minsize = 15)


fig.show()

In [494]:
fig = px.pie(df, names = "classASD",
             title = "<b>Autism Spectrum Disorder Counts</b>",
             template = "plotly_dark")

fig.update_traces(textposition='inside',
                  textinfo = 'percent+label',
                  marker = dict(line = dict(color = '#000000', width = 1.5)))

fig.update_layout(title_x = 0.5,
                  title_font = dict(size = 20),
                  uniformtext_minsize = 15)


fig.show()

In [495]:
fig = df[df['classASD'] == "YES"]['gender'].value_counts().plot(kind = 'bar',                                                               
                                                                 template = "seaborn",
                                                                 color_discrete_sequence = ["#84b1b5"],
                                                                 labels = {"index":"Gender",
                                                                          "value":"Counts"});

fig.update_layout(title = "<b>Gender Count of ASD Patients</b>\n",
                  title_font = dict(size = 20), width = 900)

fig.show()

In [496]:
asd_patients_country_wise = pd.DataFrame(df[df['classASD'] == "YES"]['contry_of_res'].value_counts()).rename({"contry_of_res":"ASD_Patient_Counts"}, axis = 1)

In [497]:
asd_patients_country_wise.style.bar(color="#84A9AC") 

In [498]:
fig = px.bar(data_frame = asd_patients_country_wise, 
             x = asd_patients_country_wise.index,    
             y = "ASD_Patient_Counts",
             labels = {"index" : "Country"},     
             color_discrete_sequence = px.colors.qualitative.D3_r,
             template='plotly_dark')

fig.update_xaxes(tickangle = 310)

fig.update_layout(title={
        'text': "<b>Counts of ASD Patients Country Wise</b>",
        'y':0.93,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

In [499]:
#data.autism = data.autism.replace(['yes', 'no'], [1, 0])
#data.classASD = data.classASD.replace(['YES', 'NO'], [1, 0])
#data.jundice = data.jundice.replace(['yes', 'no'], [1, 0])
#data.gender = data.gender.replace(['m', 'f'], [1, 0])
data = data.drop(columns = ['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', ])

In [500]:
plt.figure(figsize = (15, 15))
sns.countplot(x = 'classASD', hue = 'ethnicity', data = data)
plt.show()

In [501]:
plt.figure(figsize = (30, 20))
sns.countplot(x = 'classASD', hue = 'contry_of_res', data = data)

In [502]:
data.drop(['age_desc'], axis = 1, inplace = True)

In [503]:
X = data.drop("classASD", axis = 1)    # select all other feature except "Class/ASD" for training
Y = data['classASD']

In [504]:
X = pd.get_dummies(X)
Y = pd.get_dummies(Y)

In [505]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25)

In [506]:
print(f"Shape of X_train is: {X_train.shape}")
print(f"Shape of Y_train is: {Y_train.shape}\n")
print(f"Shape of X_test is: {X_test.shape}")
print(f"Shape of Y_test is: {Y_test.shape}")

In [507]:
#creating ANN model
input_dim = X.shape[1]

In [508]:
model = Sequential()
model.add(Dense(8, input_dim = input_dim, kernel_initializer='normal', activation='relu'))
model.add(Dense(5, activation = "relu", kernel_initializer='normal'))
model.add(Dense(2, activation = 'sigmoid'))

In [509]:
# compiling model
model.compile(optimizer = Adam(learning_rate = 0.001),
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

In [510]:
model.summary()

In [511]:
plot_model(model,
           show_dtype = True,
           show_layer_names = True,
           show_shapes = True)

In [512]:
#training model
result = model.fit(X_train, Y_train, epochs = 20, batch_size = 10)

In [513]:
#visualization model Accuracy & Loss
acc = result.history['accuracy']
loss = result.history['loss']

epoch = [i + 1 for i in range(len(acc))]

In [514]:
acc_loss_df = pd.DataFrame({"Accuracy" : acc,
                            "Loss" : loss,
                            "Epoch" : epoch})

acc_loss_df.style.bar(color = '#84A9AC',
                      subset = ['Accuracy','Loss'])


In [515]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = acc_loss_df['Epoch'],
                         y = acc_loss_df['Accuracy'],
                         mode='lines+markers',
                         name='Accuracy'))

fig.add_trace(go.Scatter(x = acc_loss_df['Epoch'],
                         y = acc_loss_df['Loss'],
                         mode='lines+markers',
                         name = 'Loss'))

fig.update_layout(title = {'text': "<b>Training Accuracy Vs Training Loss</b>\n",
                           'xanchor': 'center',
                           'yanchor': 'top',
                           'y':0.9,'x':0.5,},
                  xaxis_title="Epoch",
                  yaxis_title = "Accuracy / Loss",
                  title_font = dict(size = 20))

fig.layout.template = 'plotly_dark'

fig.show()

In [516]:
#evaluting model
loss, acc = model.evaluate(X_test, Y_test)

In [517]:
print(f"Accuracy on unseen data is: { np.round(acc, 2) }")
print(f'Loss on unseen data is: { np.round(loss, 2) }')

In [518]:
#classification report
prediction = model.predict(X_test)
prediction = np.argmax(prediction, axis = 1)

In [519]:
print(accuracy_score(Y_test[['YES']], prediction))

In [520]:
print(classification_report(Y_test[['YES']], prediction))

In [521]:
#new model


In [522]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['ethnicity'] = le.fit_transform(data['ethnicity'])
data['jundice'] = le.fit_transform(data['jundice'])
data['autism'] = le.fit_transform(data['autism'])
data['contry_of_res'] = le.fit_transform(data['contry_of_res'])
data['relation'] = le.fit_transform(data['relation'])
data['classASD'] = le.fit_transform(data['classASD'])
data['gender'] = le.fit_transform(data['gender'])
data['age'] = le.fit_transform(data['age'])

In [523]:
data.head()

In [524]:
corrMatrix = data.corr()
sns.heatmap(corrMatrix, annot=True)
plt.show()

In [525]:
data = data.drop(columns = ['gender', 'contry_of_res', 'relation'])

In [526]:
data.classASD.hist()

In [527]:

data = data.drop(columns = ['used_app_before'])

In [528]:
data.head()

In [529]:
X = data.drop(columns = 'classASD')
Y = data['classASD']

In [530]:
from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
scaler.fit(X)  
X = scaler.transform(X)  

In [531]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='adam', alpha=1e-3, hidden_layer_sizes=(3, 2), random_state=1, max_iter=10000)
clf.fit(X, Y)
MLPClassifier(alpha=1e-3, hidden_layer_sizes=(3, 2), random_state=1, solver='adam', max_iter=10000)

In [532]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, X, Y, cv=5)

In [533]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(3, input_dim = 5, activation='relu'))
model.add(Dense(3, activation = 'relu'))
#model.add(Dense(1, activation='sigmoid'))

In [534]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, Y, epochs = 200, batch_size=10, verbose = 0)
_, accuracy = model.evaluate(X, Y)
print('Accuracy: %.4f' % (accuracy*100))

In [535]:
from sklearn.utils import check_random_state
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [536]:
lx = data.drop(columns = ['classASD'])
ly = data['classASD']
lx_train, lx_test, ly_train, ly_test = train_test_split(lx, ly, test_size=0.2)

In [537]:
lx_train = scaler.fit_transform(lx_train)
lx_test = scaler.fit_transform(lx_test)
logReg = LogisticRegression(C = 50. / 80000, penalty='l1', solver='saga', tol=0.1)
logReg.fit(lx_train, ly_train)
sparsity = np.mean(logReg.coef_ == 0) * 100
score = logReg.score(lx_test, ly_test)

print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

In [None]:
plt.figure(figsize=(18,8))

data = train_df.corr()

mask = np.triu(np.ones_like(df))
plt.title('Pearson Correlation of Features', y=1.05, size=15)
sns.heatmap(df, annot=True, cbar=False, cmap="Blues",mask=mask)
plt.show()