In [102]:
import pandas as pd
from unicodedata import normalize
import numpy as np

In [103]:
# Select first dataframe from cdc site, remove unneccesary headers
df = pd.read_html('https://www.cdc.gov/mmwr/volumes/69/wr/mm6932a1.htm#T1_down', header=[2])[0]
df.head()

Unnamed: 0,Characteristic,"All respondents who completed surveys during June 24–30, 2020 weighted* no. (%)",Anxiety disorder†,Depressive disorder†,Anxiety or depressive disorder†,COVID-19–related TSRD§,Started or increased substance use to cope with pandemic-related stress or emotions¶,Seriously considered suicide in past 30 days,≥1 adverse mental or behavioral health symptom
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
1,Gender,Gender,Gender,Gender,Gender,Gender,Gender,Gender,Gender
2,Female,"2,784 (50.9)",26.3,23.9,31.5,24.7,12.2,8.9,41.4
3,Male,"2,676 (48.9)",24.7,24.8,30.4,27.9,14.4,12.6,40.5
4,Other,10 (0.2),20.0,30.0,30.0,30.0,10.0,0.0,30.0


In [104]:
# Rename columns to more be more workable
clean_df = df.rename(columns={
    "All respondents who completed surveys during June 24–30, 2020 weighted* no. (%)" : "respondents",
    "Anxiety disorder†" : "anxiety",
    "Depressive disorder†" : "depression",
    "Anxiety or depressive disorder†" : "anxiety_or_depression",
    "COVID-19–related TSRD§" : "covid_related_trauma",
    "Started or increased substance use to cope with pandemic-related stress or emotions¶" : "increased_substance_use",
    "Seriously considered suicide in past 30 days" : "considered_suicide",
    "≥1 adverse mental or behavioral health symptom" : "one_or_more_symptoms"
    })

In [105]:
# function to clean up unnecessary whitespace retrieved from https://pbpython.com/pandas-html-table.html
def clean_normalize_whitespace(x):
    if isinstance(x, str):
        return normalize('NFKC', x).strip()
    else:
        return x

In [106]:
# Runs clean_normalize_whitespace on main dataframe
clean_df = clean_df.applymap(clean_normalize_whitespace)

In [107]:
clean_df

Unnamed: 0,Characteristic,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
1,Gender,Gender,Gender,Gender,Gender,Gender,Gender,Gender,Gender
2,Female,"2,784 (50.9)",26.3,23.9,31.5,24.7,12.2,8.9,41.4
3,Male,"2,676 (48.9)",24.7,24.8,30.4,27.9,14.4,12.6,40.5
4,Other,10 (0.2),20.0,30.0,30.0,30.0,10.0,0.0,30.0
...,...,...,...,...,...,...,...,...,...
59,Yes,540 (9.9),52.5,50.6,60.8,45.5,25.2,22.1,68.8
60,No,"4,930 (90.1)",22.6,21.5,27.7,24.2,12.0,9.4,37.9
61,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder,Posttraumatic stress disorder
62,Yes,251 (4.6),72.3,69.1,78.7,69.4,43.8,44.8,88.0


In [108]:
totals_row = clean_df.loc[0]

In [109]:
# Create gender dataframe with totals row from clean df added
gender_df = clean_df[2:5].append(totals_row)
gender_df = gender_df.rename(columns={'Characteristic' : 'Gender'})
gender_df = gender_df.sort_index()
gender_df

Unnamed: 0,Gender,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
2,Female,"2,784 (50.9)",26.3,23.9,31.5,24.7,12.2,8.9,41.4
3,Male,"2,676 (48.9)",24.7,24.8,30.4,27.9,14.4,12.6,40.5
4,Other,10 (0.2),20.0,30.0,30.0,30.0,10.0,0.0,30.0


In [110]:
# Create gender dataframe with totals row from clean df added
age_df = clean_df[7:10].append(totals_row)
age_df = age_df.rename(columns={'Characteristic' : 'Age Group'})
age_df = age_df.sort_index()
age_df

Unnamed: 0,Age Group,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
7,25–44,"1,911 (34.9)",35.3,32.5,40.4,36.0,19.5,16.0,51.9
8,45–64,"1,895 (34.6)",16.1,14.4,20.3,17.2,7.7,3.8,29.5
9,≥65,933 (17.1),6.2,5.8,8.1,9.2,3.0,2.0,15.1


In [111]:
# Create ethnicity dataframe with totals row from clean df added

ethnicity_df = clean_df[12:17].append(totals_row)
ethnicity_df = ethnicity_df.rename(columns={'Characteristic' : 'Ethnicity'})
ethnicity_df = ethnicity_df.sort_index()
ethnicity_df

Unnamed: 0,Ethnicity,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
12,"Black, non-Hispanic",663 (12.1),23.4,24.6,30.2,30.4,18.4,15.1,44.2
13,"Asian, non-Hispanic",256 (4.7),14.1,14.2,18.0,22.1,6.7,6.6,31.9
14,"Other race or multiple races, non-Hispanic**",164 (3.0),27.8,29.3,33.2,28.3,11.0,9.8,43.8
15,"Hispanic, any race(s)",885 (16.2),35.5,31.3,40.8,35.1,21.9,18.6,52.1
16,Unknown,50 (0.9),38.0,34.0,44.0,34.0,18.0,26.0,48.0


In [112]:
# Create income dataframe with totals row from clean df added
income_df = clean_df[19:24].append(totals_row)
income_df = income_df.rename(columns={'Characteristic' : 'Income'})
income_df = income_df.sort_index()
income_df

Unnamed: 0,Income,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
19,"25,000–49,999","1,123 (20.5)",26.0,25.6,33.2,27.2,13.5,10.1,43.9
20,"50,999–99,999","1,775 (32.5)",27.1,24.8,31.6,26.4,12.6,11.4,40.3
21,"100,999–199,999","1,301 (23.8)",23.1,20.8,27.7,24.2,15.5,11.7,37.8
22,"≥200,000",282 (5.2),17.4,17.0,20.6,23.1,14.8,11.6,35.1
23,Unknown,247 (4.5),19.6,23.1,27.2,24.9,6.2,3.9,41.5


In [113]:
# Create education dataframe with totals row from clean df added
education_df = clean_df[26:31].append(totals_row)
education_df = education_df.rename(columns={'Characteristic' : 'Education'})
education_df = education_df.sort_index()
education_df

Unnamed: 0,Education,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
26,High school diploma,943 (17.2),31.5,32.8,38.4,32.1,15.3,13.1,48.0
27,Some college,"1,455 (26.6)",25.2,23.4,31.7,22.8,10.9,8.6,39.9
28,Bachelor’s degree,"1,888 (34.5)",24.7,22.5,28.7,26.4,14.2,10.7,40.6
29,Professional degree,"1,074 (19.6)",20.9,19.5,25.4,24.5,12.6,10.0,35.2
30,Unknown,33 (0.6),25.2,23.2,28.2,23.2,10.5,5.5,28.2


In [114]:
# Create employment dataframe with totals row from clean df added
employment_df = clean_df[33:37].append(totals_row)
employment_df = employment_df.rename(columns={'Characteristic' : 'Employment'})
employment_df = employment_df.sort_index()
employment_df

Unnamed: 0,Employment,respondents,anxiety,depression,anxiety_or_depression,covid_related_trauma,increased_substance_use,considered_suicide,one_or_more_symptoms
0,All respondents,"5,470 (100)",25.5,24.3,30.9,26.3,13.3,10.7,40.9
33,Essential,"1,785 (32.6)",35.5,33.6,42.4,38.5,24.7,21.7,54.0
34,Nonessential,"1,646 (30.1)",24.1,24.1,29.9,25.2,10.5,7.8,41.0
35,Unemployed,761 (13.9),32.0,29.4,37.8,25.0,7.7,4.7,45.9
36,Retired,"1,278 (23.4)",9.6,8.7,12.1,11.3,4.2,2.5,19.6
