In [37]:
import numpy as np
import pandas as pd 
import plotly.express as px
from statsmodels.tsa.seasonal import STL
from scipy import stats
from datetime import datetime

In [38]:
RAW = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')

In [39]:
RAW1 = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv')

In [40]:
def parsedata(state, RAW):
    """
    Function parses data for use in a later function 
        :param input: ['State_Name'] , [Raw DF Name] 
        :param state: Any state name can be named, Capitalized 
        :param vers: Col names are County names + State totals 
        :return: dataframe formatted to use in the next function 
    """

    a = RAW[RAW['Province_State'] == state]
    # sets a filtered data frame from the RAW which only contains data relevant to the referenced state
    b1 = a[['Province_State', 'Admin2']].T
    b2 = a.iloc[:,11:].T
    b2['Date'] =b2.index
    state_df = pd.concat([b1, b2], axis=0, ignore_index=True)
    state_df.rename(columns=state_df.iloc[1], inplace = True)
    # combining and cleaning my dataframes for ML anomaly analysis
    state_df= state_df.drop([0,1,2])
    # cleaning combined data frame
    state_df.rename(columns = {np.nan:'Date'}, inplace = True)
    state_df = state_df.set_index('Date', drop = False)
    # rename Date column and set index
    state_df['State_Cum'] = state_df.drop('Date', axis=1).sum(axis=1)
    # create a state cummulative column
    state_df['State_Daily'] = state_df['State_Cum'].diff().fillna(value = 0)
    # create a state daily column
    state_df['Date'] = pd.to_datetime(state_df['Date'])
    return state_df

In [41]:
md = parsedata('Maryland',RAW)

In [42]:
def parsedeath(state, RAW):
    a = RAW[RAW['Province_State'] == state]
    b1 = a[['Province_State', 'Admin2','Population']].T
    b2 = a.iloc[:,12:].T

    b2['Date'] =b2.index
    state_df = pd.concat([b1, b2], axis=0, ignore_index=True)
    state_df.rename(columns=state_df.iloc[1], inplace = True)
    state_df= state_df.drop([0,1,2])

    state_df.rename(columns = {np.nan:'Date'}, inplace = True)
    state_df = state_df.set_index('Date', drop = False)

    state_df['State_Cum'] = state_df.drop('Date', axis=1).sum(axis=1)
    # create a state cummulative column
    state_df['State_Daily'] = state_df['State_Cum'].diff().fillna(value = 0)
    # create a state daily column
    state_df['Date'] = pd.to_datetime(state_df['Date'])
    return state_df, b1

In [43]:
mdd,pop = parsedeath('Maryland', RAW1)

In [44]:
pop = pop.T

In [45]:
pop=pop.rename(columns={'Admin2':'County'})
pop=pop.drop('Province_State',axis=1)

In [46]:
cases = md[(md['Date'] == "2023-03-09")]
deaths = mdd[(mdd['Date'] == "2023-03-09")]

In [47]:
deaths=deaths.reset_index(drop= True)
cases=cases.reset_index(drop=True)
deaths = deaths.T
cases = cases.T

In [48]:
cases = cases.reset_index()
deaths = deaths.reset_index()
cases = cases.rename(columns = {"index" : "County",0:'Cases'})
deaths = deaths.rename(columns = {"index" : "County",0:'Deaths'})

In [49]:
covid = pd.merge(cases, deaths, on='County')
covid = pd.merge(covid, pop, on='County')

In [50]:
covid.drop([16,22], axis=0, inplace=True)

In [51]:
covid

Unnamed: 0,County,Cases,Deaths,Population
0,Allegany,21731,392,70416
1,Anne Arundel,119808,1277,579234
2,Baltimore,172399,2826,827370
3,Baltimore City,149524,1966,593490
4,Calvert,14785,173,92525
5,Caroline,7330,92,33406
6,Carroll,28133,475,168447
7,Cecil,20355,286,102855
8,Charles,38706,408,163257
9,Dorchester,9556,116,31929


In [52]:
race = pd.read_csv('race_demo.csv')

In [53]:
race.columns.values[0] = 'drop'
race = race.drop('drop', axis=1)

In [54]:
race.County = race.County.str.replace('County','')

In [55]:
pop1 = pop
pop1.drop([1255,1261], axis=0, inplace=True)

In [56]:
pop1 = pop1.loc[pop.index.repeat(6)].reset_index(drop=True)

In [57]:
pop1

Unnamed: 0,County,Population
0,Allegany,70416
1,Allegany,70416
2,Allegany,70416
3,Allegany,70416
4,Allegany,70416
...,...,...
139,Worcester,52276
140,Worcester,52276
141,Worcester,52276
142,Worcester,52276


In [58]:
race = race.sort_values('County')
race = race.reset_index(drop=True)

In [59]:
race['Total Pop'] = pop1.Population

In [63]:
race['Percentage'] = race['Population Size'] / race['Total Pop'] * 100

In [61]:
race = race.reset_index(drop=True)

In [64]:
race

Unnamed: 0,County,State,Race,Population Size,Total Pop,Percentage
0,Allegany,MD,Two or more Races,1457,70416,2.069132
1,Allegany,MD,Native Hawaiian or Pacific Islander,49,70416,0.069586
2,Allegany,MD,Black,5818,70416,8.262327
3,Allegany,MD,White,62142,70416,88.24983
4,Allegany,MD,Native American or Alaskan,153,70416,0.21728
...,...,...,...,...,...,...
139,Worcester,MD,Black,6813,52276,13.032749
140,Worcester,MD,Asian,801,52276,1.532252
141,Worcester,MD,Native Hawaiian or Pacific Islander,23,52276,0.043997
142,Worcester,MD,Two or more Races,1013,52276,1.937792


In [65]:
low = race[race['Race']== 'White']

In [67]:
low = low.sort_values('Percentage')

In [68]:
low

Unnamed: 0,County,State,Race,Population Size,Total Pop,Percentage
100,Prince George's,MD,White,246145,909327,27.06892
23,Baltimore City,MD,White,188646,593490,31.785877
53,Charles,MD,White,67932,163257,41.610467
112,Somerset,MD,White,13896,25616,54.247345
82,Howard,MD,White,182223,325690,55.94983
91,Montgomery,MD,White,630063,1050688,59.966708
13,Baltimore,MD,White,497980,827370,60.188308
137,Wicomico,MD,White,68639,103609,66.248106
59,Dorchester,MD,White,21303,31929,66.71991
7,Anne Arundel,MD,White,426312,579234,73.599271


In [69]:
fig = px.histogram(race, x= 'County', y= 'Percentage', color='Race', title='County racial demographics bar graph', barmode='group',width=1000,height=1000)
fig.show()