In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from math import sqrt
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from urllib.request import urlopen
import json

social and economic impacts of Covid-19 on American households

National Center for Health Statistics. Indicators of Anxiety or Depression Based on Reported Frequency of Symptoms During Last 7 Days. Data accessed [Last accessed date]. Available from: https://data.cdc.gov/d/8pt5-q6wp.

In [18]:
usa = pd.read_csv('./data/Indicators_of_Anxiety_or_Depression_Based_on_Reported_Frequency_of_Symptoms_During_Last_7_Days.csv', sep=',')
usa = pd.DataFrame(usa)


1-Discovering Data set

In [19]:
usa.head()

Unnamed: 0,Indicator,Group,State,Subgroup,Phase,Time Period,Time Period Label,Time Period Start Date,Time Period End Date,Value,Low CI,High CI,Confidence Interval,Quartile Range
0,Symptoms of Depressive Disorder,National Estimate,United States,United States,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.5,22.7,24.3,22.7 - 24.3,
1,Symptoms of Depressive Disorder,By Age,United States,18 - 29 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,32.7,30.2,35.2,30.2 - 35.2,
2,Symptoms of Depressive Disorder,By Age,United States,30 - 39 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,25.7,24.1,27.3,24.1 - 27.3,
3,Symptoms of Depressive Disorder,By Age,United States,40 - 49 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,24.8,23.3,26.2,23.3 - 26.2,
4,Symptoms of Depressive Disorder,By Age,United States,50 - 59 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.2,21.5,25.0,21.5 - 25.0,


In [20]:
usa['Group'].value_counts()

By State                      7650
By Age                        1218
By Race/Hispanic ethnicity     870
By Education                   696
By Sex                         348
By Gender identity             180
By Sexual orientation          180
National Estimate              174
By Disability status           168
Name: Group, dtype: int64

2-Evaluate the evolution of the depression the past 3 years

2.1-Add a year column

In [21]:
usa['Year'] = usa['Time Period Label'].str[-4:].astype(int)

2.2-Add month column

In [22]:
usa['Month'] = usa['Time Period Start Date'].str[:2].astype(int)
usa

Unnamed: 0,Indicator,Group,State,Subgroup,Phase,Time Period,Time Period Label,Time Period Start Date,Time Period End Date,Value,Low CI,High CI,Confidence Interval,Quartile Range,Year,Month
0,Symptoms of Depressive Disorder,National Estimate,United States,United States,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.5,22.7,24.3,22.7 - 24.3,,2020,4
1,Symptoms of Depressive Disorder,By Age,United States,18 - 29 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,32.7,30.2,35.2,30.2 - 35.2,,2020,4
2,Symptoms of Depressive Disorder,By Age,United States,30 - 39 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,25.7,24.1,27.3,24.1 - 27.3,,2020,4
3,Symptoms of Depressive Disorder,By Age,United States,40 - 49 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,24.8,23.3,26.2,23.3 - 26.2,,2020,4
4,Symptoms of Depressive Disorder,By Age,United States,50 - 59 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.2,21.5,25.0,21.5 - 25.0,,2020,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11479,Symptoms of Anxiety Disorder or Depressive Dis...,By State,Virginia,Virginia,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,33.5,29.6,37.5,29.6 - 37.5,33.4-35.7,2022,10
11480,Symptoms of Anxiety Disorder or Depressive Dis...,By State,Washington,Washington,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,36.9,32.6,41.4,32.6 - 41.4,35.8-38.5,2022,10
11481,Symptoms of Anxiety Disorder or Depressive Dis...,By State,West Virginia,West Virginia,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,38.2,31.2,45.6,31.2 - 45.6,35.8-38.5,2022,10
11482,Symptoms of Anxiety Disorder or Depressive Dis...,By State,Wisconsin,Wisconsin,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,29.9,25.8,34.3,25.8 - 34.3,27.5-33.3,2022,10


3-Evaluate depression repartition through America

In [31]:
states = usa.query("Indicator == 'Symptoms of Depressive Disorder' & Group == 'By State'")
states['Value'].max()
states = states.groupby(states['Subgroup']).mean().reset_index()

3.1-Add States code columns

In [32]:
us_states = {
    'AK': 'Alaska',
    'AL': 'Alabama',
    'AR': 'Arkansas',
    'AZ': 'Arizona',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DC': 'District of Columbia',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'IA': 'Iowa',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'MA': 'Massachusetts',
    'MD': 'Maryland',
    'ME': 'Maine',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MO': 'Missouri',
    'MS': 'Mississippi',
    'MT': 'Montana',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'NE': 'Nebraska',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NV': 'Nevada',
    'NY': 'New York',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VA': 'Virginia',
    'VT': 'Vermont',
    'WA': 'Washington',
    'WI': 'Wisconsin',
    'WV': 'West Virginia',
    'WY': 'Wyoming'
}

code = []

for x in states['Subgroup']:
    for i in us_states:
        if us_states[i] == x:
            code.append(i)

states['Code'] = code
display(states)

Unnamed: 0,Subgroup,Time Period,Value,Low CI,High CI,Year,Month,Code
0,Alabama,25.5,26.658,21.846,31.924,2020.78,6.46,AL
1,Alaska,25.5,24.352,20.35,28.728,2020.78,6.46,AK
2,Arizona,25.5,25.692,21.944,29.722,2020.78,6.46,AZ
3,Arkansas,25.5,27.284,22.594,32.39,2020.78,6.46,AR
4,California,25.5,26.138,23.406,29.026,2020.78,6.46,CA
5,Colorado,25.5,23.51,20.044,27.274,2020.78,6.46,CO
6,Connecticut,25.5,21.988,18.22,26.15,2020.78,6.46,CT
7,Delaware,25.5,21.954,17.224,27.322,2020.78,6.46,DE
8,District of Columbia,25.5,22.966,17.466,29.25,2020.78,6.46,DC
9,Florida,25.5,25.25,21.87,28.864,2020.78,6.46,FL


In [33]:
states.drop(columns=['Time Period','Low CI', 'High CI', 'Year', 'Month'], inplace=True)

In [34]:
states.head()

Unnamed: 0,Subgroup,Value,Code
0,Alabama,26.658,AL
1,Alaska,24.352,AK
2,Arizona,25.692,AZ
3,Arkansas,27.284,AR
4,California,26.138,CA


3.2-Display MAP

In [36]:
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

usa_fig = go.Figure(data=go.Choropleth(
    locations=list(states["Code"]), # Spatial coordinates
    z = states["Value"], # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Blues',
    colorbar_title = "Depression %",
))

usa_fig.update_layout(
    title_text = 'Répartission de la dépression par états',
    geo_scope='usa', # limite map scope to USA
)

usa_fig.show()

Notre distribution n'étant pas normale, nous effectuons un test de Kruskal-Wallis

3.2.1- See evolution per year

In [37]:
states2020 = usa.query("Indicator == 'Symptoms of Depressive Disorder' & Group == 'By State' & Year == 2020")
fig2020 = go.Figure(data=go.Choropleth(
    locations=list(states["Code"]), # Spatial coordinates
    z = states2020["Value"], # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Depression %",
))

fig2020.update_layout(
    title_text = 'Depression through USA in 2020',
    geo_scope='usa', # limite map scope to USA
)

fig2020.show()

In [38]:
states2021 = usa.query("Indicator == 'Symptoms of Depressive Disorder' & Group == 'By State' & Year == 2021")
fig2021 = go.Figure(data=go.Choropleth(
    locations=list(states["Code"]), # Spatial coordinates
    z = states2021["Value"], # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Depression %",
))

fig2021.update_layout(
    title_text = 'Depression through USA in 2021',
    geo_scope='usa', # limite map scope to USA
)

fig2021.show()

In [39]:
states2022 = usa.query("Indicator == 'Symptoms of Depressive Disorder' & Group == 'By State' & Year == 2022")
fig2022 = go.Figure(data=go.Choropleth(
    locations=list(states["Code"]), # Spatial coordinates
    z = states2022["Value"], # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Depression %",
))

fig2022.update_layout(
    title_text = 'Depression through USA in 2022',
    geo_scope='usa', # limite map scope to USA
)

fig2022.show()

4-Evaluate the repartition between genders

In [40]:
dep = usa.query("Indicator == 'Symptoms of Depressive Disorder' & Group == 'By Sex'")
dep_gender = dep.pivot_table(index='Time Period Label', columns='Subgroup', values='Value').reset_index()

In [56]:
dep_gender2020 = dep_gender.query('Year == 2020')
dep_gender2020.describe()

Subgroup,Female,Male,Year
count,21.0,21.0,21.0
mean,28.214286,24.190476,2020.0
std,1.96705,1.981894,0.0
min,25.8,20.8,2020.0
25%,26.7,22.7,2020.0
50%,27.4,23.7,2020.0
75%,29.7,25.7,2020.0
max,32.3,27.9,2020.0


In [55]:
dep_gender2021 = dep_gender.query('Year == 2021')
dep_gender2021.describe()

Subgroup,Female,Male,Year
count,19.0,19.0,19.0
mean,25.194737,21.789474,2021.0
std,2.868308,2.70676,0.0
min,22.4,18.4,2021.0
25%,23.1,19.85,2021.0
50%,23.6,20.7,2021.0
75%,27.2,23.7,2021.0
max,30.6,26.7,2021.0


In [57]:
dep_gender2022 = dep_gender.query('Year == 2022')
dep_gender2022.describe()

Subgroup,Female,Male,Year
count,10.0,10.0,10.0
mean,24.14,21.22,2022.0
std,0.929994,1.468786,0.0
min,22.9,19.9,2022.0
25%,23.4,20.425,2022.0
50%,23.95,20.65,2022.0
75%,24.875,21.125,2022.0
max,25.7,24.3,2022.0


In [51]:
dep_gender.describe()

Subgroup,Female,Male,Year
count,50.0,50.0,50.0
mean,26.252,22.684,2020.78
std,2.784669,2.532028,0.763718
min,22.4,18.4,2020.0
25%,23.6,20.525,2020.0
50%,26.0,22.6,2021.0
75%,28.325,24.275,2021.0
max,32.3,27.9,2022.0


In [14]:
dep_gender_mean = dep_gender.groupby('Year').mean().reset_index()
dep_gender_mean

Subgroup,Year,Female,Male
0,2020,28.214286,24.190476
1,2021,25.194737,21.789474
2,2022,24.14,21.22


VISUALIZATION DEPRESSION BY GENDER OVER THE YEARS

In [15]:
fig_year = go.Figure(data=[
    go.Bar(name='Female', x=dep_gender_mean['Year'], y=dep_gender_mean['Female']),
    go.Bar(name='Male', x=dep_gender_mean['Year'], y=dep_gender_mean['Male'])
])
fig_year.update_layout(barmode='group')
fig_year.show()

TTEST

Les échantillons suivent-ils une répartition normale

In [16]:
female_dist = stats.normaltest(dep_gender['Female'])
male_dist = stats.normaltest(dep_gender['Male'])

display(female_dist, male_dist)

NormaltestResult(statistic=6.35487900330908, pvalue=0.04169227155966351)

NormaltestResult(statistic=6.177617192121805, pvalue=0.04555619791888785)

In [65]:
print(stats.skew(dep_gender['Female']), stats.skew(dep_gender['Male']))

0.4428105755090232 0.406927830759628


In [67]:
dep_gender_mean.describe()

Subgroup,Year,Female,Male
count,3.0,3.0,3.0
mean,2021.0,25.849674,22.399983
std,1.0,2.11463,1.576539
min,2020.0,24.14,21.22
25%,2020.5,24.667368,21.504737
50%,2021.0,25.194737,21.789474
75%,2021.5,26.704511,22.989975
max,2022.0,28.214286,24.190476


In [18]:
dep_gender_mean.var()

Subgroup
Year      1.000000
Female    4.471658
Male      2.485474
dtype: float64

Nous ne pouvons pas rejeter l'hypothese nulle (l'echantillon suit une loi normale) avec un intervalle de confiance de 99% et les variances de nos échantillons sont différentes => test de Welsh

H0 : la proportion de dépression est la même chez les hommes et chez les femmes

H1 : la proportion de dépression chez les femmes est significativement différente que celle des hommes

In [19]:
stats.ttest_ind(dep_gender['Female'], dep_gender['Male'], equal_var=False)

Ttest_indResult(statistic=6.703368767147004, pvalue=1.34397691230813e-09)

On peut rejeter notre H0 avec une confiance de 99%

 Mann-Whitney U rank test
 
 H0 : la proportion de dépression est la même chez les hommes et chez les femmes

H1 : la proportion de dépression chez les femmes est significativement plus élevée que celle des hommes

In [20]:
stats.mannwhitneyu(dep_gender['Female'], dep_gender['Male'], alternative='greater')

MannwhitneyuResult(statistic=2051.5, pvalue=1.6690334176205657e-08)

PER YEAR AND PER SEASON

In [76]:
season = usa.groupby(['Year', 'Month']).mean().reset_index()
# season = season.dropna()
sorted_month = season.sort_values(by='Month', inplace=True)
season

Unnamed: 0,Year,Month,Time Period,Value,Low CI,High CI
9,2021,1,22.5,34.496905,30.642857,38.542857
20,2022,1,42.0,27.923932,24.72094,31.332906
21,2022,2,1.0,,,
10,2021,2,24.5,32.801905,29.064524,36.747619
22,2022,3,43.5,27.708333,24.364316,31.279274
11,2021,3,23.45283,30.263571,26.529048,34.245476
0,2020,4,1.0,29.150952,25.741905,32.78381
23,2022,4,45.0,27.020513,23.316239,31.023504
12,2021,4,28.5,26.577315,23.025926,30.431944
1,2020,5,3.5,28.72,24.812024,32.946071


In [45]:
fig_season = px.line(season, x="Month", y="Value", color='Year', markers=True, 
            title='Evolution de la proportion de dépression dans le temps depuis COVID-19',
            labels=dict(Month="Mois", Value="Dépression (%)"))
fig_season.show()

ANOVA

H0 : la proportion de dépression est la même chaque année

H1: La proportion de dépression est différente chaque année

In [46]:
season.groupby('Year').agg({'Value':['count', 'mean', 'std', 'var']})

Unnamed: 0_level_0,Value,Value,Value,Value
Unnamed: 0_level_1,count,mean,std,var
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2020,9,31.917615,2.714639,7.369266
2021,10,28.662898,2.999652,8.99791
2022,8,29.009615,1.733166,3.003865


In [58]:
pivot_season = season.pivot_table(columns='Year', values='Value', index='Month').reset_index()


Variances et échantillons inégaux => test de Kruskal-Wallis

H0: la proportion de dépression est la même chaque année

H1: la proportion de dépression est significativement différente chaque années

In [60]:
stats.kruskal(pivot_season[2020],pivot_season[2021],pivot_season[2022], nan_policy='omit')

KruskalResult(statistic=8.07142857142857, pvalue=0.01767305199510627)

COMPARAISONS DE NOTRE ÉCHANTILLON AVEC LA MOYENNE DE LA POPULATION

During August 2020–February 2021, the percentage of adults with recent symptoms of an anxiety or a depressive disorder increased from 36.4% to 41.5%, 5000 participants

In [134]:
y_20_21 = season.query("Year < 2022")
end_20 = y_20_21.query("Year == 2020 & Month >= 8")
start_21 = y_20_21.query("Year == 2021 & Month <= 2")

end_20

Unnamed: 0,Year,Month,Time Period,Value,Low CI,High CI
4,2020,8,13.0,29.938571,27.065238,32.973333
5,2020,9,15.0,30.651111,27.563968,33.900794
6,2020,10,17.5,33.188333,29.456905,37.115952
7,2020,11,19.5,35.436429,31.60119,39.445238
8,2020,12,21.0,35.931905,32.07,39.972381


In [130]:
start_21

Unnamed: 0,Year,Month,Time Period,Value,Low CI,High CI
9,2021,1,22.5,34.496905,30.642857,38.542857
10,2021,2,24.5,32.801905,29.064524,36.747619


In [145]:
compare = pd.concat([end_20, start_21])

compare['Date']= compare['Year'].astype(str) + "-" + compare['Month'].astype(str)
compare['Date'].sort_values()
compare

Unnamed: 0,Year,Month,Time Period,Value,Low CI,High CI,Date
4,2020,8,13.0,29.938571,27.065238,32.973333,2020-8
5,2020,9,15.0,30.651111,27.563968,33.900794,2020-9
6,2020,10,17.5,33.188333,29.456905,37.115952,2020-10
7,2020,11,19.5,35.436429,31.60119,39.445238,2020-11
8,2020,12,21.0,35.931905,32.07,39.972381,2020-12
9,2021,1,22.5,34.496905,30.642857,38.542857,2021-1
10,2021,2,24.5,32.801905,29.064524,36.747619,2021-2


In [146]:
fig_season = px.line(compare, x="Date", y="Value", markers=True, 
            title='Evolution de la proportion de dépression dans le temps depuis COVID-19',
            labels=dict(Month="Mois", Value="Dépression (%)"))
fig_season.show()

In [70]:
monthly = season.groupby('Month').mean()
monthly

Unnamed: 0_level_0,Year,Time Period,Value,Low CI,High CI
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2021.5,32.25,31.210418,27.681899,34.937882
2,2021.0,24.5,32.801905,29.064524,36.747619
3,2021.5,33.476415,28.985952,25.446682,32.762375
4,2021.0,24.833333,27.582927,24.028023,31.413086
5,2020.5,17.0,27.053519,23.339577,31.093869
6,2021.0,28.833333,28.439676,24.682025,32.489524
7,2021.0,28.390102,29.972821,26.165006,34.045804
8,2020.5,24.25,28.876658,25.692234,32.273205
9,2021.0,34.0,30.157493,26.529917,34.025716
10,2021.0,33.75,32.415534,28.392555,36.680626


In [74]:
monthly = monthly.query("Month >= 4").reset_index()
monthly

Unnamed: 0,Month,Year,Time Period,Value,Low CI,High CI
0,4,2021.0,24.833333,27.582927,24.028023,31.413086
1,5,2020.5,17.0,27.053519,23.339577,31.093869
2,6,2021.0,28.833333,28.439676,24.682025,32.489524
3,7,2021.0,28.390102,29.972821,26.165006,34.045804
4,8,2020.5,24.25,28.876658,25.692234,32.273205
5,9,2021.0,34.0,30.157493,26.529917,34.025716
6,10,2021.0,33.75,32.415534,28.392555,36.680626
7,11,2020.0,19.5,35.436429,31.60119,39.445238
8,12,2021.0,31.453608,30.727729,27.099687,34.582389


In [75]:
fig_month = px.line(monthly, x="Month", y="Value", markers=True, 
            title='Evolution de la proportion de dépression dans le temps depuis COVID-19',
            labels=dict(Month="Mois", Value="Dépression (%)"))
fig_month.show()

ETUDE BY AGE

In [23]:
age = usa.query("Indicator == 'Symptoms of Depressive Disorder' & Group == 'By Age' ")
age

Unnamed: 0,Indicator,Group,State,Subgroup,Phase,Time Period,Time Period Label,Time Period Start Date,Time Period End Date,Value,Low CI,High CI,Confidence Interval,Quartile Range,Year,Month
1,Symptoms of Depressive Disorder,By Age,United States,18 - 29 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,32.7,30.2,35.2,30.2 - 35.2,,2020,4
2,Symptoms of Depressive Disorder,By Age,United States,30 - 39 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,25.7,24.1,27.3,24.1 - 27.3,,2020,4
3,Symptoms of Depressive Disorder,By Age,United States,40 - 49 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,24.8,23.3,26.2,23.3 - 26.2,,2020,4
4,Symptoms of Depressive Disorder,By Age,United States,50 - 59 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.2,21.5,25.0,21.5 - 25.0,,2020,4
5,Symptoms of Depressive Disorder,By Age,United States,60 - 69 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,18.4,17.0,19.7,17.0 - 19.7,,2020,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11253,Symptoms of Depressive Disorder,By Age,United States,40 - 49 years,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,25.5,23.6,27.4,23.6 - 27.4,,2022,10
11254,Symptoms of Depressive Disorder,By Age,United States,50 - 59 years,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,24.4,22.6,26.3,22.6 - 26.3,,2022,10
11255,Symptoms of Depressive Disorder,By Age,United States,60 - 69 years,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,19.0,17.1,21.0,17.1 - 21.0,,2022,10
11256,Symptoms of Depressive Disorder,By Age,United States,70 - 79 years,3.6,50,"Oct 5 - Oct 17, 2022",10/05/2022,10/17/2022,14.0,12.0,16.3,12.0 - 16.3,,2022,10


In [24]:
age = age.groupby('Subgroup').agg({'Value':['mean', 'count', 'std', 'var']}).reset_index()


In [25]:
display(age)

Unnamed: 0_level_0,Subgroup,Value,Value,Value,Value
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,count,std,var
0,18 - 29 years,37.538,50,3.187404,10.159547
1,30 - 39 years,28.544,50,2.534349,6.422922
2,40 - 49 years,25.308,50,2.513786,6.319118
3,50 - 59 years,23.41,50,3.051413,9.311122
4,60 - 69 years,18.182,50,2.808347,7.886812
5,70 - 79 years,13.438,50,2.676877,7.165669
6,80 years and above,12.824,50,3.240783,10.502678


In [153]:
usa['Group'].value_counts()

By State                      7650
By Age                        1218
By Race/Hispanic ethnicity     870
By Education                   696
By Sex                         348
By Gender identity             180
By Sexual orientation          180
National Estimate              174
By Disability status           168
Name: Group, dtype: int64