In [133]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn .metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier


import plotly.graph_objects as go
import plotly.express as px
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model

sns.set_style("darkgrid")
pd.set_option("display.max_columns", None)   # setting to display all columns
pd.options.plotting.backend = "plotly"


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


seed = 143
np.random.seed(seed)

%matplotlib inline

In [134]:
data = pd.read_csv('../input/autism-screening-on-adults/autism_screening.csv')

In [135]:
len(data)

In [136]:
data.head()

In [137]:
data.columns

In [138]:
data.nunique()

In [139]:
data.isnull().sum()

In [140]:
data.replace('?', np.nan, inplace=True)
data = data.rename(columns={'Class/ASD' : 'classASD'})
data = data.rename(columns={'austim' : 'autism'})
data.info()

In [141]:
sns.swarmplot(x='classASD', y='result', data=data)

In [142]:
sns.distplot(data['age'])

In [143]:
print(df['age'].describe())
sns.distplot(df['age'], bins=50, kde = False)

In [144]:
fig = df.groupby('gender').agg({'age':'mean'}).plot(kind='bar',                                                    
                                                    template = "seaborn",
                                                    labels = {"gender":"Gender",
                                                              "value":"Age"},
                                                    color_discrete_sequence = ["#84b1b5"]);

fig.update_layout(title = "<b>Average Age</b>\n",
                  title_font = dict(size = 20),)


fig.show()

In [145]:
cols = ['gender','jundice','autism','used_app_before']
for i in cols:
    sns.swarmplot(x='classASD', y=i, data=df)
    plt.show()

In [146]:
fig = sns.barplot(y=df['contry_of_res'].value_counts().index[:15], x=df['contry_of_res'].value_counts().values[:15], data=df)
fig.set(xlabel='Count', ylabel='Country')
plt.show()

In [147]:
fig = px.pie(df, names = "gender",
             title = "<b>Counts of Male and Female</b>",
             hole = 0.5, template = "plotly_dark")

fig.update_traces(textposition='inside',
                  textinfo='percent+label',
                  marker=dict(line=dict(color='#000000', width = 1.5)))

fig.update_layout(title_x = 0.5,
                  title_font = dict(size = 20),
                  uniformtext_minsize = 15)


fig.show()

In [148]:
fig = px.pie(df, names = "classASD",
             title = "<b>Autism Spectrum Disorder Counts</b>",
             template = "plotly_dark")

fig.update_traces(textposition='inside',
                  textinfo = 'percent+label',
                  marker = dict(line = dict(color = '#000000', width = 1.5)))

fig.update_layout(title_x = 0.5,
                  title_font = dict(size = 20),
                  uniformtext_minsize = 15)


fig.show()

In [149]:
fig = df[df['classASD'] == "YES"]['gender'].value_counts().plot(kind = 'bar',                                                               
                                                                 template = "seaborn",
                                                                 color_discrete_sequence = ["#84b1b5"],
                                                                 labels = {"index":"Gender",
                                                                          "value":"Counts"});

fig.update_layout(title = "<b>Gender Count of ASD Patients</b>\n",
                  title_font = dict(size = 20), width = 900)

fig.show()

In [150]:
asd_patients_country_wise = pd.DataFrame(df[df['classASD'] == "YES"]['contry_of_res'].value_counts()).rename({"contry_of_res":"ASD_Patient_Counts"}, axis = 1)

In [151]:
asd_patients_country_wise.style.bar(color="#84A9AC") 

In [152]:
fig = px.bar(data_frame = asd_patients_country_wise, 
             x = asd_patients_country_wise.index,    
             y = "ASD_Patient_Counts",
             labels = {"index" : "Country"},     
             color_discrete_sequence = px.colors.qualitative.D3_r,
             template='plotly_dark')

fig.update_xaxes(tickangle = 310)

fig.update_layout(title={
        'text': "<b>Counts of ASD Patients Country Wise</b>",
        'y':0.93,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

In [153]:
#data.autism = data.autism.replace(['yes', 'no'], [1, 0])
#data.classASD = data.classASD.replace(['YES', 'NO'], [1, 0])
#data.jundice = data.jundice.replace(['yes', 'no'], [1, 0])
#data.gender = data.gender.replace(['m', 'f'], [1, 0])
data = data.drop(columns = ['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', ])

In [154]:
plt.figure(figsize = (15, 15))
sns.countplot(x = 'classASD', hue = 'ethnicity', data = data)
plt.show()

In [155]:
plt.figure(figsize = (30, 20))
sns.countplot(x = 'classASD', hue = 'contry_of_res', data = data)