# Meningitis
Infection Rates Over Time Compared In A Variety of Ways

In [2]:
# importing dependencies
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

# Meningitis data file
meningitis_data_df = "Resources/Meningitis_Data.csv"

In [3]:
# Read the Meningitis data 
meningitis_data_df = "Resources/Meningitis_Data.csv"
meningitis_df = pd.read_csv(meningitis_data_df)


In [4]:
# Display the data table for preview
meningitis_df.head()

Unnamed: 0,Year,Value,Units,Bacteria,Topic,ViewBy,ViewBy2
0,1997,1.44,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,11-17 years old
1,1997,1.61,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,18-22 years old
2,1997,0.46,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,23-49 years old
3,1997,1.16,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,5-10 years old
4,1997,4.23,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,<5 years old


US Infection Rates Over Time

In [5]:
# Set new index to 'Year'

df = meningitis_df.set_index("Year")
df.head()

Unnamed: 0_level_0,Value,Units,Bacteria,Topic,ViewBy,ViewBy2
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997,1.44,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,11-17 years old
1997,1.61,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,18-22 years old
1997,0.46,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,23-49 years old
1997,1.16,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,5-10 years old
1997,4.23,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,<5 years old


In [6]:
meningitis_df.dtypes

Year          int64
Value       float64
Units        object
Bacteria     object
Topic        object
ViewBy       object
ViewBy2      object
dtype: object

In [7]:
for x in meningitis_df.columns:
    print ("----------------------")
    print (x)
    print (meningitis_df[x].value_counts())

----------------------
Year
1997    27
1998    27
2019    27
2018    27
2017    27
2016    27
2015    27
2014    27
2013    27
2012    27
2011    27
2010    27
2009    27
2008    27
2007    27
2006    27
2005    27
2004    27
2003    27
2002    27
2001    27
2000    27
1999    27
2020    27
Name: Year, dtype: int64
----------------------
Value
0.030       26
0.100       22
0.200       19
0.160       18
0.080       16
            ..
0.495        1
0.480        1
1.730        1
1664.000     1
0.529        1
Name: Value, Length: 235, dtype: int64
----------------------
Units
Per 100,000 population                       312
Per 100,000 population (excluding Oregon)    168
Counts                                        96
Percent                                       72
Name: Units, dtype: int64
----------------------
Bacteria
Neisseria meningitidis    648
Name: Bacteria, dtype: int64
----------------------
Topic
Case rates                    360
Number of Cases and Deaths     96
Serogroups 

In [9]:
#checking for n/as
meningitis_df.isnull().sum()

Year          0
Value         0
Units         0
Bacteria      0
Topic         0
ViewBy        0
ViewBy2     144
dtype: int64

In [10]:
#check for duplicates
#look into this more
duplicates_df=meningitis_df[meningitis_df.duplicated()]
duplicates_df

Unnamed: 0,Year,Value,Units,Bacteria,Topic,ViewBy,ViewBy2


In [16]:
meningitis_2018=meningitis_df[meningitis_df["Year"]==2018]
meningitis_2019=meningitis_df[meningitis_df["Year"]==2019]
meningitis_2020=meningitis_df[meningitis_df["Year"]==2020]

meningitis_year=meningitis_df.groupby(["Year"]).agg({"ViewBy" :["sum"]})

# Filter the DataFrame down only to those columns to chart
meningitis_df = meningitis_df[["Year", "Value"]]

# Set the index to be "Year" so they will be used as labels
#meningitis_df = meningitis_df.set_index("Year")

meningitis_2020.head()

Unnamed: 0,Year,Value,Units,Bacteria,Topic,ViewBy,ViewBy2
621,2020,0.03,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,11-17 years old
622,2020,0.1,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,18-22 years old
623,2020,0.05,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,23-49 years old
624,2020,0.06,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,5-10 years old
625,2020,0.27,"Per 100,000 population (excluding Oregon)",Neisseria meningitidis,Case rates,Age,<5 years old


In [18]:
#checking for value counts in 2020 data for overall illness counts
#how do you call ViewBy, Overall?
meningitis_2020[0].value_counts()

KeyError: 0

In [9]:
#display all column names of DataFrame
#print(df.columns.tolist())

#['Year', 'Value', 'Topic', 'ViewBy']

In [19]:
# locate total cases by 'Year' in the 'Value' column

total_cases = meningitis_df.iloc [:,:2]
total_cases
#print("Using Loc: " + total_cases)





Unnamed: 0,Year,Value
0,1997,1.440
1,1997,1.610
2,1997,0.460
3,1997,1.160
4,1997,4.230
...,...,...
643,2020,0.020
644,2020,0.030
645,2020,0.088
646,2020,0.235


In [21]:
# Read Dataframe rows only from Topic: case rates and ViewBy: Overall

meningitis_2018=meningitis_df[meningitis_df["Year"]==2018]
meningitis_2019=meningitis_df[meningitis_df["Year"]==2019]
meningitis_2020=meningitis_df[meningitis_df["Year"]==2020]
meningitis_2021=meningitis_df[meningitis_df["Year"]==2021]

meningitis_year=meningitis_df.groupby(["Year"]).agg({"Value":["sum"]})

# Filter the DataFrame down only to those columns to chart
meningitis_df = meningitis_df[["Year","Value", "State", "Topic", "ViewBy"]]

# Set the index to be "Year" so they will be used as labels
meningitis_df = meningitis_df.set_index("Year")

meningitis_df.head()


KeyError: "['State', 'Topic', 'ViewBy'] not in index"

In [22]:
#aggregating by year AND overall cases

meningitis_year_and_overall_cases=meningitis_df.groupby(["Year", "Value"]).agg({"Topic":["Overall"]})
meningitis_year_and_overall_cases.reset_index()
meningitis_year_and_overall_cases

KeyError: "Column(s) ['Topic'] do not exist"

In [23]:
from matplotlib import style
style.use('fivethirtyeight')


ax=meningitis_year_and_overall_cases.plot(kind="line", figsize=(20,6))
    # ax.set_xticklabels(meningitis_year_and_overall["Year", "Value", "Overall"])
plt.show()

NameError: name 'meningitis_year_and_overall_cases' is not defined

In [None]:
# Create visualization to display case counts by year overlayed with covid cases
#bar graph? w/ each year containing different colors for each of the 3 types of illness categories and one for covid?

State Infection Rates
Top 10 States (with longest masking mandates)
New York
Massachusetts
New Jersey
Delaware
Connecticut
Virginia
Rhode Island
Maine
Michigan
Illinois
New Mexico

State Infection Rates
Bottom 10 States (with shortest masking mandates)
Florida
Georgia
South Carolina
Tennessee
Montana
Oklahoma
South Dakota
Wyoming
Idaho
Arizona