# Learning how to plot autism prevalence with Plotly
[Tutorial here](https://sharkcoder.com/data-visualization/plotly-maps)

In [1]:
# Imports
import plotly.graph_objects as go
import pandas as pd

In [2]:
file = '/Users/alexwilkinson/code/ADWilk19/autism_employment/raw_data/Life expectancy.csv'
df = pd.read_csv(file)
df.head()

Unnamed: 0,Country,Life expectancy,ISO-code
0,Afghanistan,64.5,AFG
1,Algeria,76.7,DZA
2,Andorra,81.8,AND
3,Angola,60.8,AGO
4,Antigua and Barbuda,76.9,ATG


In [8]:
fig = go.Figure(data=go.Choropleth(
    locations = df['ISO-code'],
    z = df['Life expectancy'],
    text = df['Country'],
    colorscale = 'Viridis',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Years',
))
fig.update_layout(
    width=1000,
    height=620,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    title={
        'text': '<b>Life expectancy by country</b>',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    title_font_color='#525252',
    title_font_size=26,
    font=dict(
        family='Heebo', 
        size=18, 
        color='#525252'
    ),
    annotations = [dict(
        x=0.5,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.kaggle.com/daniboy370/world-data-by-country-2020">\
            Kaggle / WORLD DATA by country (2020)</a>',
        showarrow = False
    )]
)
fig.show()  

***

## Sourcing data
- I sourced the autism prevalence data from [here](https://worldpopulationreview.com/country-rankings/autism-rates-by-country)
- I found the latitude and longitude from various websites/Google searches.

In [29]:
# Load both datasets
file = '/Users/alexwilkinson/code/ADWilk19/autism_employment/raw_data/world_autism_prevalence/autism_world_prevalence.csv'
file2 = '/Users/alexwilkinson/code/ADWilk19/autism_employment/raw_data/world_autism_prevalence/country_coords.csv'
aut_prev = pd.read_csv(file)
coords = pd.read_csv(file2)

In [30]:
aut_prev.head()

Unnamed: 0,name,iso_a3,est_autism_prevalence_per_10k
0,Fiji,FJI,81.1
1,Tanzania,TZA,83.1
2,W. Sahara,ESH,83.83
3,Canada,CAN,86.4
4,United States of America,USA,80.9


In [31]:
coords.head()

Unnamed: 0,country,ISO_A3,latitude,longitude,name
0,AD,AND,42.546245,1.601554,Andorra
1,AE,ARE,23.424076,53.847818,United Arab Emirates
2,AF,AFG,33.93911,67.709953,Afghanistan
3,AG,ATG,17.060816,-61.796428,Antigua and Barbuda
4,AI,AIA,18.220554,-63.068615,Anguilla


In [32]:
print('The length of the autism prevalence dataset is: ' + str(len(aut_prev)))
print('The length of the coordinates dataset is: ' + str(len(coords)))

The length of the autism prevalence dataset is: 198
The length of the coordinates dataset is: 246


In [33]:
# change ISO_A3 column name to iso_a3 in the coords dataframe
coords.rename(columns = {'ISO_A3':'iso_a3'}, inplace = True)
coords.columns

Index(['country', 'iso_a3', 'latitude', 'longitude', 'name'], dtype='object')

In [34]:
# Perform a left join on iso_a3 to get latitude and longitude 
map_df = pd.merge(aut_prev,coords, on='iso_a3', how='left')
map_df.head()

Unnamed: 0,name_x,iso_a3,est_autism_prevalence_per_10k,country,latitude,longitude,name_y
0,Fiji,FJI,81.1,FJ,-16.578193,179.414413,Fiji
1,Tanzania,TZA,83.1,TZ,-6.369028,34.888822,Tanzania
2,W. Sahara,ESH,83.83,EH,24.215527,-12.885834,Western Sahara
3,Canada,CAN,86.4,CA,56.130366,-106.346771,Canada
4,United States of America,USA,80.9,US,37.09024,-95.712891,United States


In [35]:
# Drop duplicate name_y column and rename name_x column 
map_df.drop('name_y',axis=1,inplace=True)
map_df.rename(columns= {'name_x':'name'}, inplace=True)
map_df.head()

Unnamed: 0,name,iso_a3,est_autism_prevalence_per_10k,country,latitude,longitude
0,Fiji,FJI,81.1,FJ,-16.578193,179.414413
1,Tanzania,TZA,83.1,TZ,-6.369028,34.888822
2,W. Sahara,ESH,83.83,EH,24.215527,-12.885834
3,Canada,CAN,86.4,CA,56.130366,-106.346771
4,United States of America,USA,80.9,US,37.09024,-95.712891


In [60]:
fig2 = go.Figure(data=go.Choropleth(
    locations = map_df['iso_a3'],
    z = map_df['est_autism_prevalence_per_10k'],
    text = map_df['country'],
    colorscale = 'Viridis',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Prevalence per 10,000',
))
fig2.update_layout(
    width=1000,
    height=620,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='natural earth'
    ),
    title={
        'text': '<b>Autism prevalence per 10,000 people by country</b>',
        'y':0.9,
        'x':0.5,
        'xanchor': 'right',
        'yanchor': 'top',
    },
    title_font_color='#525252',
    title_font_size=20,
    font=dict(
        family='Heebo', 
        size=12, 
        color='#525252'
    ),
    annotations = [dict(
        x=0.5,
        y=0.01,
        xref='paper',
        yref='paper',
        text='Source:<a href="https://worldpopulationreview.com/country-rankings/autism-rates-by-country">\
            Autism rates by country (2022)</a>',
        showarrow = False
    )]
)
fig2.show()  