<a href="https://colab.research.google.com/github/OleKrarup123/PythonTutorials/blob/main/pandas_read_html/pandas_read_html.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Scrape table from website

In [None]:
#Automatically get list of dataframes from tables on website
df_list=pd.read_html('https://www.cdc.gov/growthcharts/html_charts/wtage.htm#males') 
print(type(df_list)) #<-- Type = 'list'
print(type(df_list[0]),type(df_list[1])) # [ pandas dataframe with data for males, pandas dataframe with data for females ]


## Extract individual dataframes from list

In [None]:
#Get separate dataframe for males
df_male=df_list[0]


#Get separate dataframe for females
df_female=df_list[1]

#Print head of dataframe
print(df_male.head())

## Plot data

In [None]:
#Plot median mass of males and females

plt.figure()
plt.plot(df_male['Age (in months)'],df_male['50th Percentile Weight (in kilograms)'],label='Males')
plt.plot(df_female['Age (in months)'],df_female['50th Percentile Weight (in kilograms)'],label='Females')
plt.xlabel('Age [Months]')
plt.ylabel('Mass [kg]')
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.grid()
plt.show()

plt.figure()
plt.plot(df_male['Age (in months)']/12,df_male['50th Percentile Weight (in kilograms)'],label='Males')
plt.plot(df_female['Age (in months)']/12,df_female['50th Percentile Weight (in kilograms)'],label='Females')
plt.xlabel('Age [Years]')
plt.ylabel('Mass [kg]')
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.grid()
plt.show()

## Make dual-axis plot

In [None]:
# https://matplotlib.org/stable/gallery/subplots_axes_and_figures/secondary_axis.html

fig, ax = plt.subplots(constrained_layout=True)
ax.plot(df_male['Age (in months)']/12, df_male['50th Percentile Weight (in kilograms)'],label='Males')
ax.plot(df_female['Age (in months)']/12, df_female['50th Percentile Weight (in kilograms)'],label='Females')
ax.set_xlabel('Age [Years]')
ax.set_ylabel('Mass [kg]')
ax.set_title('Chart of male and female mass vs. age')

#Define tick functions
def months2years(months): 
  return months/12.0

def years2months(years):
  return years*12.0

secax = ax.secondary_xaxis('top', functions=(years2months,months2years))
secax.set_xlabel('Age [Months]')
ax.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.grid()
plt.show()

## Make chart of 10%-90% percentile and median

In [None]:
#https://matplotlib.org/stable/plot_types/basic/fill_between.html#sphx-glr-plot-types-basic-fill-between-py
#https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html

fig, ax = plt.subplots()
ax.fill_between(df_male['Age (in months)']/12, #x_data 
                df_male['10th Percentile Weight (in kilograms)'], #y_min 
                df_male['90th Percentile Weight (in kilograms)'], #y_max 
                alpha=.3,    #Transparency 
                linewidth=0, #Linewidth
                label='Male [10%-90%]')

ax.plot(df_male['Age (in months)']/12, 
        df_male['50th Percentile Weight (in kilograms)'], 
        linewidth=2,
        label='Male median') #Plot male median

ax.fill_between(df_female['Age (in months)']/12, #x_data 
                df_female['10th Percentile Weight (in kilograms)'], #y_min
                df_female['90th Percentile Weight (in kilograms)'], #y_max
                alpha=.3,  #Transparency 
                linewidth=1, #Linewidth
                label='Female [10%-90%]')

ax.plot(df_female['Age (in months)']/12, 
        df_female['50th Percentile Weight (in kilograms)'], 
        linewidth=2,
        label='Female median') #Plot female median

#Make bottom x-axis
ax.set_xlabel('Age [Years]')
ax.set_ylabel('Mass [kg]')
ax.set_title('Chart of male and female mass vs. age')

#Make top x-axis
secax = ax.secondary_xaxis('top', functions=(years2months,months2years))
secax.set_xlabel('Age [Months]')

#grid and legend
ax.grid()
ax.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.show()