# Should I wear a mask?
#### P.D. You should always use a mask while still there are covid cases, regardless the countries mandates
This process will analyze the covid cases from the last days to tell you the risk and if you should wear a mask or now regardless the current mandate that your country has.

The idea is to have a certain measure of the risk of not wearing a mask today, analyzing the changes of the data.



## Get the csv files from the last month

In [1]:
# import dependencies
import pandas as pd
from datetime import datetime, timedelta
from sys import getsizeof
from memory_profiler import profile
import logging
from math import trunc


In [2]:
# set logging level
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [3]:
def generate_date_range(days = 90):
    """ Generate a date range for the data extraction based on a number of days till today
    """
    date_range = pd.date_range(end=datetime.today()-timedelta(days=1), periods=days).tolist()
    return date_range


In [4]:
number_of_days = 30
date_range = generate_date_range(days=number_of_days)

In [5]:
def extract_covid_data(processed_date: datetime):
    file_name = ''.join([processed_date.strftime('%m-%d-%Y'), '.csv'])
    source_url = ''.join(['https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_daily_reports_us/', file_name,'?raw=true']) 
    logger.debug(source_url)
    response  = pd.read_csv(source_url)
    return response

In [6]:
def merge_data(date_range: list):
    """ Given a list of dates, extracts all the data day by day, and creates a unified dataframe
    containing all the timeseries.
    """
    combined_dataframe = pd.DataFrame()
    for processed_date in date_range:
        new_dataframe = extract_covid_data(processed_date)
        combined_dataframe = pd.concat([combined_dataframe, new_dataframe])
    return combined_dataframe


covid_cases_df = merge_data(date_range)
covid_cases_df.head(5)

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,FIPS,Incident_Rate,Total_Test_Results,People_Hospitalized,Case_Fatality_Ratio,UID,ISO3,Testing_Rate,Hospitalization_Rate
0,Alabama,US,2022-02-19 04:31:19,32.3182,-86.9023,1271455,17877,,,1.0,25931.205941,7169743.0,,1.406027,84000001.0,USA,146226.238659,
1,Alaska,US,2022-02-19 04:31:19,61.3707,-152.4044,235956,1141,,,2.0,32254.475118,3741734.0,,0.483565,84000002.0,USA,511483.777485,
2,American Samoa,US,2022-02-19 04:31:19,-14.271,-170.132,18,0,,,60.0,32.350245,2140.0,,0.0,16.0,ASM,3846.084722,
3,Arizona,US,2022-02-19 04:31:19,33.7298,-111.4312,1962920,27513,,,4.0,26967.939542,18066365.0,,1.401636,84000004.0,USA,248208.097663,
4,Arkansas,US,2022-02-19 04:31:19,34.9697,-92.3731,812948,10271,,,5.0,26938.396264,4918663.0,,1.263426,84000005.0,USA,162988.152975,


In [7]:
covid_cases_filtered_df = covid_cases_df.filter(items=['Province_State', 'Last_Update', 'Lat', 'Long_', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'Incident_Rate', 'Case_Fatility_Ratio'])
covid_cases_filtered_df.head(5)

Unnamed: 0,Province_State,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate
0,Alabama,2022-02-19 04:31:19,32.3182,-86.9023,1271455,17877,,,25931.205941
1,Alaska,2022-02-19 04:31:19,61.3707,-152.4044,235956,1141,,,32254.475118
2,American Samoa,2022-02-19 04:31:19,-14.271,-170.132,18,0,,,32.350245
3,Arizona,2022-02-19 04:31:19,33.7298,-111.4312,1962920,27513,,,26967.939542
4,Arkansas,2022-02-19 04:31:19,34.9697,-92.3731,812948,10271,,,26938.396264


In [8]:
# Check the dataframe
covid_cases_filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1740 entries, 0 to 57
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Province_State  1740 non-null   object 
 1   Last_Update     1740 non-null   object 
 2   Lat             1680 non-null   float64
 3   Long_           1680 non-null   float64
 4   Confirmed       1740 non-null   int64  
 5   Deaths          1740 non-null   int64  
 6   Recovered       0 non-null      float64
 7   Active          0 non-null      float64
 8   Incident_Rate   1680 non-null   float64
dtypes: float64(5), int64(2), object(2)
memory usage: 135.9+ KB


In [9]:
# Group dataframe by state, and take in consideration the incident rate (covid cases per 100K persons, to see the trends equally between states)
total_mean_us = covid_cases_filtered_df['Incident_Rate'].mean()
print(f'US covid cases of the last {number_of_days} days: {trunc(total_mean_us)} per 100,000 persons')



US covid cases of the last 30 days: 23530 per 100,000 persons


In [10]:
def get_covid_general_statistics(covid_df):
    covid_df = 1
    return