In [1]:
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt

# Set pandas option to display all columns
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.reset_option('max_rows')


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
columns_to_load = [
     'location_key'
    , 'date'
    , 'place_id'
    , 'wikidata_id'
    , 'datacommons_id'
    , 'country_code'
    , 'country_name'
    , 'subregion1_code'
    , 'subregion1_name'
    , 'subregion2_code'
    , 'subregion2_name'
    , 'new_confirmed'
    , 'new_deceased'
    , 'cumulative_confirmed'
    , 'cumulative_deceased'
    , 'new_persons_fully_vaccinated'
    , 'cumulative_persons_fully_vaccinated'
    , 'population'
    , 'population_male'
    , 'population_female'
    , 'population_age_00_09'
    , 'population_age_10_19'
    , 'population_age_20_29'
    , 'population_age_30_39'
    , 'population_age_40_49'
    , 'population_age_50_59'
    , 'population_age_60_69'
    , 'population_age_70_79'
    , 'population_age_80_and_older'
    , 'openstreetmap_id'
    , 'latitude'
    , 'longitude'
    , 'area_sq_km'
    , 'life_expectancy'
    , 'mobility_retail_and_recreation'
    , 'mobility_grocery_and_pharmacy'
    , 'mobility_parks'
    , 'mobility_transit_stations'
    , 'mobility_workplaces'
    , 'mobility_residential'
    , 'average_temperature_celsius'
    , 'minimum_temperature_celsius'
    , 'maximum_temperature_celsius'
    , 'rainfall_mm'
    , 'dew_point'
    , 'relative_humidity'
]

In [3]:
# Directory path to your CSV files
directory_path = "../Test CSVs/"  # Example directory path

# Initialize an empty DataFrame to store data from all CSV files
full_df = pd.DataFrame(columns=columns_to_load)

# Loop over the list of CSV files
for file in glob.glob(directory_path + '*.csv'):
    # Read the CSV file without specifying columns to ensure we don't miss any data
    df = pd.read_csv(file)
    
    # Ensure all desired columns are present, add them with NaN values if they are missing
    for column in columns_to_load:
        if column not in df.columns:
            df[column] = np.nan

    # Reorder and select only the desired columns to maintain consistency
    df = df[columns_to_load]
    
    # Append the contents of the file to the full DataFrame
    full_df = pd.concat([full_df, df], ignore_index=True)

# Optionally, reset the index of the final DataFrame
full_df.reset_index(drop=True, inplace=True)



  full_df = pd.concat([full_df, df], ignore_index=True)
  full_df = pd.concat([full_df, df], ignore_index=True)


In [4]:
# Data Prep
freq_df = full_df.copy()
freq_df['date'] = pd.to_datetime(freq_df['date'])
freq_df['quarter'] = freq_df['date'].dt.to_period('Q').dt.start_time
freq_df['month'] = freq_df['date'].dt.to_period('M').dt.to_timestamp()
freq_df['week'] = freq_df['date'] - pd.to_timedelta(freq_df['date'].dt.weekday, unit='d')
freq_df['county_name'] = freq_df['subregion2_name'] + ', ' + freq_df['subregion1_name']

columns_to_keep = [
    'county_name', 'location_key', 'quarter', 'month', 'week', 'date',
    'new_confirmed', 'new_deceased', 'cumulative_confirmed', 'cumulative_deceased',
    'new_persons_fully_vaccinated', 'cumulative_persons_fully_vaccinated',
    'population', 'population_male', 'population_female',
    'population_age_00_09', 'population_age_10_19', 'population_age_20_29',
    'population_age_30_39', 'population_age_40_49', 'population_age_50_59',
    'population_age_60_69', 'population_age_70_79', 'population_age_80_and_older',
    'area_sq_km',
    'life_expectancy',
    'average_temperature_celsius', 'minimum_temperature_celsius',
    'maximum_temperature_celsius', 'rainfall_mm',
    'relative_humidity'
]

freq_df = freq_df[columns_to_keep]

# Convert object types to float64 where applicable
for column in freq_df.select_dtypes(include=['object']).columns:
    try:
        freq_df[column] = freq_df[column].astype(float)
    except ValueError:
        print(f"Conversion failed for column: {column}")


aggregations = {
    'new_confirmed': 'sum',
    'new_deceased': 'sum',
    'cumulative_confirmed': 'max',
    'cumulative_deceased': 'max',
    'new_persons_fully_vaccinated': 'sum',
    'cumulative_persons_fully_vaccinated': 'max',
    'population': 'max',
    'population_male': 'max',
    'population_female': 'max',
    'population_age_00_09': 'max',
    'population_age_10_19': 'max',
    'population_age_20_29': 'max',
    'population_age_30_39': 'max',
    'population_age_40_49': 'max',
    'population_age_50_59': 'max',
    'population_age_60_69': 'max',
    'population_age_70_79': 'max',
    'population_age_80_and_older': 'max',
    'area_sq_km': 'max',
    'life_expectancy': 'max',
    'average_temperature_celsius': 'mean',
    'minimum_temperature_celsius': 'mean',
    'maximum_temperature_celsius': 'mean',
    'rainfall_mm': 'sum',
    'relative_humidity': 'mean'
}


# Group and aggregate for weekly, monthly, quarterly
weekly_df = freq_df.groupby(['county_name', 'week']).agg(aggregations).reset_index()
# monthly_df = freq_df.groupby(['county_name', 'month']).agg(aggregations).reset_index()
# quarterly_df = freq_df.groupby(['county_name', 'quarter']).agg(aggregations).reset_index()

# Format descriptive columns
weekly_df['description'] = weekly_df['county_name'] + " - Week of " + weekly_df['week'].dt.strftime('%Y-%m-%d')
# monthly_df['description'] = monthly_df['county_name'] + " - Month of " + monthly_df['month'].dt.strftime('%Y-%m')
# quarterly_df['description'] = quarterly_df['county_name'] + " - Quarter of " + quarterly_df['quarter'].dt.strftime('%Y-Q%q')





Conversion failed for column: county_name
Conversion failed for column: location_key


In [5]:
# Normalize the specified metrics by population to get per capita values
normalize_columns = [
    'new_confirmed',
    'new_deceased',
    'cumulative_confirmed',
    'cumulative_deceased',
    'new_persons_fully_vaccinated',
    'cumulative_persons_fully_vaccinated',
    'population_male', 'population_female',
    'population_age_00_09', 'population_age_10_19', 'population_age_20_29',
    'population_age_30_39', 'population_age_40_49', 'population_age_50_59',
    'population_age_60_69', 'population_age_70_79', 'population_age_80_and_older',
]

# Function to apply normalization
def normalize_by_population(df, columns):
    for col in columns:
        df[f'{col}_per_100k'] = df[col] / df['population'] * 100000
    return df

# Apply normalization
weekly_df = normalize_by_population(weekly_df, normalize_columns)
weekly_df.head()

Unnamed: 0,county_name,week,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,new_persons_fully_vaccinated,cumulative_persons_fully_vaccinated,population,population_male,population_female,population_age_00_09,population_age_10_19,population_age_20_29,population_age_30_39,population_age_40_49,population_age_50_59,population_age_60_69,population_age_70_79,population_age_80_and_older,area_sq_km,life_expectancy,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,relative_humidity,description,new_confirmed_per_100k,new_deceased_per_100k,cumulative_confirmed_per_100k,cumulative_deceased_per_100k,new_persons_fully_vaccinated_per_100k,cumulative_persons_fully_vaccinated_per_100k,population_male_per_100k,population_female_per_100k,population_age_00_09_per_100k,population_age_10_19_per_100k,population_age_20_29_per_100k,population_age_30_39_per_100k,population_age_40_49_per_100k,population_age_50_59_per_100k,population_age_60_69_per_100k,population_age_70_79_per_100k,population_age_80_and_older_per_100k
0,"Adams County, Colorado",2019-12-30,0.0,0.0,,,0.0,,503590.0,254232.0,249358.0,74850.0,73029.0,71893.0,81068.0,67359.0,59799.0,43573.0,21329.0,10690.0,3102.0,77.977528,2.713334,-4.903333,10.97679,0.0,43.75703,"Adams County, Colorado - Week of 2019-12-30",0.0,0.0,,,0.0,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593
1,"Adams County, Colorado",2020-01-06,0.0,0.0,,,0.0,,503590.0,254232.0,249358.0,74850.0,73029.0,71893.0,81068.0,67359.0,59799.0,43573.0,21329.0,10690.0,3102.0,77.977528,0.588095,-6.238889,8.563492,0.0,43.450591,"Adams County, Colorado - Week of 2020-01-06",0.0,0.0,,,0.0,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593
2,"Adams County, Colorado",2020-01-13,0.0,0.0,,,0.0,,503590.0,254232.0,249358.0,74850.0,73029.0,71893.0,81068.0,67359.0,59799.0,43573.0,21329.0,10690.0,3102.0,77.977528,-0.452381,-7.979365,8.264286,0.0,41.711824,"Adams County, Colorado - Week of 2020-01-13",0.0,0.0,,,0.0,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593
3,"Adams County, Colorado",2020-01-20,0.0,0.0,,,0.0,,503590.0,254232.0,249358.0,74850.0,73029.0,71893.0,81068.0,67359.0,59799.0,43573.0,21329.0,10690.0,3102.0,77.977528,2.561565,-4.45034,11.145692,0.0,42.763934,"Adams County, Colorado - Week of 2020-01-20",0.0,0.0,,,0.0,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593
4,"Adams County, Colorado",2020-01-27,0.0,0.0,,,0.0,,503590.0,254232.0,249358.0,74850.0,73029.0,71893.0,81068.0,67359.0,59799.0,43573.0,21329.0,10690.0,3102.0,77.977528,3.066667,-4.392857,11.653616,1.200453,48.735622,"Adams County, Colorado - Week of 2020-01-27",0.0,0.0,,,0.0,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593


In [6]:
change_columns = [
    'new_confirmed_per_100k',
    'new_deceased_per_100k',
    'cumulative_confirmed_per_100k',
    'cumulative_deceased_per_100k',
    'new_persons_fully_vaccinated_per_100k',
    'cumulative_persons_fully_vaccinated_per_100k',
    'average_temperature_celsius',
    'minimum_temperature_celsius',
    'maximum_temperature_celsius',
    'rainfall_mm',
    'relative_humidity'
]


# Calculate the percentage change within each group


# Ensure the DataFrame is sorted by date to correctly apply pct_change

weekly_df.sort_values(by=['county_name', 'week'], inplace=True)
weekly_df.set_index(['county_name', weekly_df.columns[1]], inplace=True)  # Set index as county_name and the time period
for col in change_columns:
    weekly_df[f'{col}_pct_change'] = weekly_df.groupby('county_name')[col].pct_change(fill_method=None).multiply(100)

# Reset index after calculations
weekly_df.reset_index(inplace=True)


In [7]:
for col in weekly_df.columns:
    print(col)

county_name
week
new_confirmed
new_deceased
cumulative_confirmed
cumulative_deceased
new_persons_fully_vaccinated
cumulative_persons_fully_vaccinated
population
population_male
population_female
population_age_00_09
population_age_10_19
population_age_20_29
population_age_30_39
population_age_40_49
population_age_50_59
population_age_60_69
population_age_70_79
population_age_80_and_older
area_sq_km
life_expectancy
average_temperature_celsius
minimum_temperature_celsius
maximum_temperature_celsius
rainfall_mm
relative_humidity
description
new_confirmed_per_100k
new_deceased_per_100k
cumulative_confirmed_per_100k
cumulative_deceased_per_100k
new_persons_fully_vaccinated_per_100k
cumulative_persons_fully_vaccinated_per_100k
population_male_per_100k
population_female_per_100k
population_age_00_09_per_100k
population_age_10_19_per_100k
population_age_20_29_per_100k
population_age_30_39_per_100k
population_age_40_49_per_100k
population_age_50_59_per_100k
population_age_60_69_per_100k
populat

In [8]:
weekly_df.tail(500)

Unnamed: 0,county_name,week,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,new_persons_fully_vaccinated,cumulative_persons_fully_vaccinated,population,population_male,population_female,population_age_00_09,population_age_10_19,population_age_20_29,population_age_30_39,population_age_40_49,population_age_50_59,population_age_60_69,population_age_70_79,population_age_80_and_older,area_sq_km,life_expectancy,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,relative_humidity,description,new_confirmed_per_100k,new_deceased_per_100k,cumulative_confirmed_per_100k,cumulative_deceased_per_100k,new_persons_fully_vaccinated_per_100k,cumulative_persons_fully_vaccinated_per_100k,population_male_per_100k,population_female_per_100k,population_age_00_09_per_100k,population_age_10_19_per_100k,population_age_20_29_per_100k,population_age_30_39_per_100k,population_age_40_49_per_100k,population_age_50_59_per_100k,population_age_60_69_per_100k,population_age_70_79_per_100k,population_age_80_and_older_per_100k,new_confirmed_per_100k_pct_change,new_deceased_per_100k_pct_change,cumulative_confirmed_per_100k_pct_change,cumulative_deceased_per_100k_pct_change,new_persons_fully_vaccinated_per_100k_pct_change,cumulative_persons_fully_vaccinated_per_100k_pct_change,average_temperature_celsius_pct_change,minimum_temperature_celsius_pct_change,maximum_temperature_celsius_pct_change,rainfall_mm_pct_change,relative_humidity_pct_change
58146,"Yates County, New York",2021-04-19,15.0,0.0,1125.0,26.0,0.0,,25002.0,12162.0,12840.0,3082.0,3308.0,3316.0,2485.0,2404.0,3455.0,3604.0,2153.0,1195.0,974.0,80.58,6.722222,1.000793,13.477778,6.451600,60.267959,"Yates County, New York - Week of 2021-04-19",59.995200,0.0,4499.640029,103.991681,0.000000,,48644.108471,51355.891529,12327.013839,13230.941525,13262.938965,9939.204864,9615.230782,13818.894488,14414.846812,8611.311095,4779.617631,-21.052632,,1.351351,0.0,,,-18.259025,-76.535175,-4.857415,-76.803653,-19.378277
58147,"Yates County, New York",2021-04-26,17.0,0.0,1142.0,26.0,432.0,8195.0,25002.0,12162.0,12840.0,3082.0,3308.0,3316.0,2485.0,2404.0,3455.0,3604.0,2153.0,1195.0,974.0,80.58,9.914286,4.113492,17.739683,31.312556,62.996033,"Yates County, New York - Week of 2021-04-26",67.994560,0.0,4567.634589,103.991681,1727.861771,32777.377810,48644.108471,51355.891529,12327.013839,13230.941525,13262.938965,9939.204864,9615.230782,13818.894488,14414.846812,8611.311095,4779.617631,13.333333,,1.511111,0.0,inf,,47.485244,311.023111,31.621717,385.345589,4.526573
58148,"Yates County, New York",2021-05-03,20.0,0.0,1162.0,26.0,547.0,8742.0,25002.0,12162.0,12840.0,3082.0,3308.0,3316.0,2485.0,2404.0,3455.0,3604.0,2153.0,1195.0,974.0,80.58,9.707936,4.988889,16.224603,20.940889,77.742800,"Yates County, New York - Week of 2021-05-03",79.993601,0.0,4647.628190,103.991681,2187.824974,34965.202784,48644.108471,51355.891529,12327.013839,13230.941525,13262.938965,9939.204864,9615.230782,13818.894488,14414.846812,8611.311095,4779.617631,17.647059,,1.751313,0.0,26.620370,6.674802,-2.081333,21.281104,-8.540622,-33.123029,23.409042
58149,"Yates County, New York",2021-05-10,6.0,0.0,1168.0,26.0,303.0,9045.0,25002.0,12162.0,12840.0,3082.0,3308.0,3316.0,2485.0,2404.0,3455.0,3604.0,2153.0,1195.0,974.0,80.58,10.869048,3.092857,17.934921,8.627533,52.186355,"Yates County, New York - Week of 2021-05-10",23.998080,0.0,4671.626270,103.991681,1211.903048,36177.105832,48644.108471,51355.891529,12327.013839,13230.941525,13262.938965,9939.204864,9615.230782,13818.894488,14414.846812,8611.311095,4779.617631,-70.000000,,0.516351,0.0,-44.606947,3.466026,11.960434,-38.005093,10.541505,-58.800541,-32.873071
58150,"Yates County, New York",2021-05-17,4.0,0.0,1172.0,26.0,274.0,9319.0,25002.0,12162.0,12840.0,3082.0,3308.0,3316.0,2485.0,2404.0,3455.0,3604.0,2153.0,1195.0,974.0,80.58,19.763492,10.791270,28.157143,0.191911,54.239643,"Yates County, New York - Week of 2021-05-17",15.998720,0.0,4687.624990,103.991681,1095.912327,37273.018159,48644.108471,51355.891529,12327.013839,13230.941525,13262.938965,9939.204864,9615.230782,13818.894488,14414.846812,8611.311095,4779.617631,-33.333333,,0.342466,0.0,-9.570957,3.029298,81.832785,248.909439,56.996196,-97.775598,3.934532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58641,"Yuma County, Colorado",2022-08-15,0.0,0.0,,,1.0,4476.0,9959.0,4956.0,5003.0,1520.0,1364.0,1113.0,1223.0,1117.0,1220.0,1142.0,769.0,491.0,6136.0,79.65,22.316667,14.610318,31.771428,4.080329,55.260483,"Yuma County, Colorado - Week of 2022-08-15",0.000000,0.0,,,10.041169,44944.271513,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,-50.000000,0.022346,-13.859020,-10.479478,-10.310526,307.263579,29.454447
58642,"Yuma County, Colorado",2022-08-22,0.0,0.0,,,0.0,4476.0,9959.0,4956.0,5003.0,1520.0,1364.0,1113.0,1223.0,1117.0,1220.0,1142.0,769.0,491.0,6136.0,79.65,23.825397,13.609524,33.813492,2.394857,38.153665,"Yuma County, Colorado - Week of 2022-08-22",0.000000,0.0,,,0.000000,44944.271513,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,-100.000000,0.000000,6.760553,-6.849910,6.427358,-41.307257,-30.956693
58643,"Yuma County, Colorado",2022-08-29,0.0,0.0,,,0.0,,9959.0,4956.0,5003.0,1520.0,1364.0,1113.0,1223.0,1117.0,1220.0,1142.0,769.0,491.0,6136.0,79.65,23.581129,13.655379,34.371605,1.320901,39.698171,"Yuma County, Colorado - Week of 2022-08-29",0.000000,0.0,,,0.000000,,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,,,-1.025242,0.336933,1.650563,-44.844264,4.048120
58644,"Yuma County, Colorado",2022-09-05,0.0,0.0,,,11.0,4487.0,9959.0,4956.0,5003.0,1520.0,1364.0,1113.0,1223.0,1117.0,1220.0,1142.0,769.0,491.0,6136.0,79.65,21.013404,11.675926,32.332716,0.464457,42.166975,"Yuma County, Colorado - Week of 2022-09-05",0.000000,0.0,,,110.452857,45054.724370,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,inf,,-10.888898,-14.495775,-5.931899,-64.837864,6.218938


In [9]:
columns_to_keep = [
    'county_name'
    , 'week'
    , 'description'
    , 'area_sq_km'
    , 'life_expectancy'
    , 'average_temperature_celsius'
    , 'minimum_temperature_celsius'
    , 'maximum_temperature_celsius'
    , 'rainfall_mm'
    , 'relative_humidity'
    , 'new_confirmed_per_100k'
    , 'new_deceased_per_100k'
    , 'cumulative_confirmed_per_100k'
    , 'cumulative_deceased_per_100k'
    , 'new_persons_fully_vaccinated_per_100k'
    , 'cumulative_persons_fully_vaccinated_per_100k'
    , 'population_male_per_100k'
    , 'population_female_per_100k'
    , 'population_age_00_09_per_100k'
    , 'population_age_10_19_per_100k'
    , 'population_age_20_29_per_100k'
    , 'population_age_30_39_per_100k'
    , 'population_age_40_49_per_100k'
    , 'population_age_50_59_per_100k'
    , 'population_age_60_69_per_100k'
    , 'population_age_70_79_per_100k'
    , 'population_age_80_and_older_per_100k'
    , 'new_confirmed_per_100k_pct_change'
    , 'new_deceased_per_100k_pct_change'
    , 'cumulative_confirmed_per_100k_pct_change'
    , 'cumulative_deceased_per_100k_pct_change'
    , 'new_persons_fully_vaccinated_per_100k_pct_change'
    , 'cumulative_persons_fully_vaccinated_per_100k_pct_change'
    , 'average_temperature_celsius_pct_change'
    , 'minimum_temperature_celsius_pct_change'
    , 'maximum_temperature_celsius_pct_change'
    , 'rainfall_mm_pct_change'
    , 'relative_humidity_pct_change'
]

weekly_df = weekly_df[columns_to_keep]
weekly_df

Unnamed: 0,county_name,week,description,area_sq_km,life_expectancy,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,relative_humidity,new_confirmed_per_100k,new_deceased_per_100k,cumulative_confirmed_per_100k,cumulative_deceased_per_100k,new_persons_fully_vaccinated_per_100k,cumulative_persons_fully_vaccinated_per_100k,population_male_per_100k,population_female_per_100k,population_age_00_09_per_100k,population_age_10_19_per_100k,population_age_20_29_per_100k,population_age_30_39_per_100k,population_age_40_49_per_100k,population_age_50_59_per_100k,population_age_60_69_per_100k,population_age_70_79_per_100k,population_age_80_and_older_per_100k,new_confirmed_per_100k_pct_change,new_deceased_per_100k_pct_change,cumulative_confirmed_per_100k_pct_change,cumulative_deceased_per_100k_pct_change,new_persons_fully_vaccinated_per_100k_pct_change,cumulative_persons_fully_vaccinated_per_100k_pct_change,average_temperature_celsius_pct_change,minimum_temperature_celsius_pct_change,maximum_temperature_celsius_pct_change,rainfall_mm_pct_change,relative_humidity_pct_change
0,"Adams County, Colorado",2019-12-30,"Adams County, Colorado - Week of 2019-12-30",3102.0,77.977528,2.713334,-4.903333,10.976790,0.000000,43.757030,0.0,0.0,,,0.000000,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593,,,,,,,,,,,
1,"Adams County, Colorado",2020-01-06,"Adams County, Colorado - Week of 2020-01-06",3102.0,77.977528,0.588095,-6.238889,8.563492,0.000000,43.450591,0.0,0.0,,,0.000000,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593,,,,,,,-78.325729,27.237710,-21.985462,,-0.700319
2,"Adams County, Colorado",2020-01-13,"Adams County, Colorado - Week of 2020-01-13",3102.0,77.977528,-0.452381,-7.979365,8.264286,0.000000,41.711824,0.0,0.0,,,0.000000,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593,,,,,,,-176.923055,27.897216,-3.493977,,-4.001713
3,"Adams County, Colorado",2020-01-20,"Adams County, Colorado - Week of 2020-01-20",3102.0,77.977528,2.561565,-4.450340,11.145692,0.000000,42.763934,0.0,0.0,,,0.000000,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593,,,,,,,-666.240740,-44.226888,34.865760,,2.522331
4,"Adams County, Colorado",2020-01-27,"Adams County, Colorado - Week of 2020-01-27",3102.0,77.977528,3.066667,-4.392857,11.653616,1.200453,48.735622,0.0,0.0,,,0.000000,,50483.925416,49516.074584,14863.281638,14501.677952,14276.097619,16098.016243,13375.762029,11874.540797,8652.475228,4235.389901,2122.758593,,,,,,,19.718495,-1.291657,4.557132,inf,13.964309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58641,"Yuma County, Colorado",2022-08-15,"Yuma County, Colorado - Week of 2022-08-15",6136.0,79.650000,22.316667,14.610318,31.771428,4.080329,55.260483,0.0,0.0,,,10.041169,44944.271513,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,-50.0,0.022346,-13.859020,-10.479478,-10.310526,307.263579,29.454447
58642,"Yuma County, Colorado",2022-08-22,"Yuma County, Colorado - Week of 2022-08-22",6136.0,79.650000,23.825397,13.609524,33.813492,2.394857,38.153665,0.0,0.0,,,0.000000,44944.271513,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,-100.0,0.000000,6.760553,-6.849910,6.427358,-41.307257,-30.956693
58643,"Yuma County, Colorado",2022-08-29,"Yuma County, Colorado - Week of 2022-08-29",6136.0,79.650000,23.581129,13.655379,34.371605,1.320901,39.698171,0.0,0.0,,,0.000000,,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,,,-1.025242,0.336933,1.650563,-44.844264,4.048120
58644,"Yuma County, Colorado",2022-09-05,"Yuma County, Colorado - Week of 2022-09-05",6136.0,79.650000,21.013404,11.675926,32.332716,0.464457,42.166975,0.0,0.0,,,110.452857,45054.724370,49764.032533,50235.967467,15262.576564,13696.154232,11175.820866,12280.349433,11215.985541,12250.225926,11467.014761,7721.658801,4930.213877,,,,,inf,,-10.888898,-14.495775,-5.931899,-64.837864,6.218938


In [11]:
# Function to apply binning
def apply_binning(df):
    # area_sq_km
    bins = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, float('inf')]
    labels = ['<1000', '1000-2000', '2000-3000', '3000-4000', '4000-5000', '5000-6000', '6000-7000', '7000-8000', '8000-9000', '9000-10000', '>10000']
    df['area_sq_km_bins'] = pd.cut(df['area_sq_km'], bins=bins, labels=labels, right=False)


    # life_expectancy
    bins = [0, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, float('inf')]
    labels = ['<70', '70-72', '72-74', '74-76', '76-78', '78-80', '80-82', '82-84', '84-86', '86-88', '>88']
    df['life_expectancy_bins'] = pd.cut(df['life_expectancy'], bins=bins, labels=labels, right=False)

    # average_temperature_celsius, minimum_temperature_celsius, maximum_temperature_celsius
    temp_bins = [df['average_temperature_celsius'].min(), 0, 15, 25, 35, df['average_temperature_celsius'].max()]
    temp_labels = ['<0°C', '0-15°C', '15-25°C', '25-35°C', '>35°C']
    df['average_temperature_celsius_bins'] = pd.cut(df['average_temperature_celsius'], bins=temp_bins, labels=temp_labels, right=False)
    df['minimum_temperature_celsius_bins'] = pd.cut(df['minimum_temperature_celsius'], bins=temp_bins, labels=temp_labels, right=False)
    df['maximum_temperature_celsius_bins'] = pd.cut(df['maximum_temperature_celsius'], bins=temp_bins, labels=temp_labels, right=False)

    # rainfall_mm
    bins = [0, 5, 10, 15, 20, float('inf')]
    labels = ['<5mm', '5-10mm', '10-15mm', '15-20mm', '>20mm']
    df['rainfall_mm_bins'] = pd.cut(df['rainfall_mm'], bins=bins, labels=labels, right=False)

    # relative_humidity
    bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    labels = ['<10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '>90%']
    df['relative_humidity_bins'] = pd.cut(df['relative_humidity'], bins=bins, labels=labels, right=False)

    # new_confirmed_per_100k
    bins = [-100000, 50, 100, 150, 200, 250, 300, 350, 400, float('inf')]
    labels = ['<50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-350', '350-400', '>400']
    df['new_confirmed_per_100k_bins'] = pd.cut(df['new_confirmed_per_100k'], bins=bins, labels=labels, right=False)

    # cumulative_confirmed_per_100k
    bins = [-100000, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, float('inf')]
    labels = ['<1000', '1000-2000', '2000-3000', '3000-4000', '4000-5000', '5000-6000', '6000-7000', '7000-8000', '8000-9000', '9000-10000', '>10000']
    df['cumulative_confirmed_per_100k_bins'] = pd.cut(df['cumulative_confirmed_per_100k'], bins=bins, labels=labels, right=False)

    # cumulative_deceased_per_100k
    bins = [-100000, 100, 200, 300, 400, 500, 600, 700, 800, float('inf')]
    labels = ['<100', '100-200', '200-300', '300-400', '400-500', '500-600', '600-700', '700-800', '>800']
    df['cumulative_deceased_per_100k_bins'] = pd.cut(df['cumulative_deceased_per_100k'], bins=bins, labels=labels, right=False)

    # new_persons_fully_vaccinated_per_100k
    bins = [-100000, 50, 100, 150, 200, 250, 300, 350, 400, float('inf')]
    labels = ['<50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-350', '350-400', '>400']
    df['new_persons_fully_vaccinated_per_100k_bins'] = pd.cut(df['new_persons_fully_vaccinated_per_100k'], bins=bins, labels=labels, right=False)

    # cumulative_persons_fully_vaccinated_per_100k
    bins = [-100000, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000, float('inf')]
    labels = ['<10000', '10000-20000', '20000-30000', '30000-40000', '40000-50000', '50000-60000', '60000-70000', '70000-80000', '80000-90000', '90000-100000', '>100000']
    df['cumulative_persons_fully_vaccinated_per_100k_bins'] = pd.cut(df['cumulative_persons_fully_vaccinated_per_100k'], bins=bins, labels=labels, right=False)

    # population_male_per_100k population_female_per_100k
    bins = [40000, 42000, 44000, 46000, 48000, 50000, 52000, 54000, 56000, 58000, 60000, float('inf')]
    labels = ['<42000', '42000-44000', '44000-46000', '46000-48000', '48000-50000', '50000-52000', '52000-54000', '54000-56000', '56000-58000', '58000-60000', '>60000']
    df['population_male_per_100k_bins'] = pd.cut(df['population_male_per_100k'], bins=bins, labels=labels, right=False)
    df['population_female_per_100k_bins'] = pd.cut(df['population_female_per_100k'], bins=bins, labels=labels, right=False)

    # population_age_xxx
    bins = [0, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000, float('inf')]
    labels = ['<2000', '2000-4000', '4000-6000', '6000-8000', '8000-10000', '10000-12000', '12000-14000', '14000-16000', '16000-18000', '18000-20000', '>20000']
    df['population_age_00_09_per_100k_bins'] = pd.cut(df['population_age_00_09_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_10_19_per_100k_bins'] = pd.cut(df['population_age_10_19_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_20_29_per_100k_bins'] = pd.cut(df['population_age_20_29_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_30_39_per_100k_bins'] = pd.cut(df['population_age_30_39_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_40_49_per_100k_bins'] = pd.cut(df['population_age_40_49_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_50_59_per_100k_bins'] = pd.cut(df['population_age_50_59_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_60_69_per_100k_bins'] = pd.cut(df['population_age_60_69_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_70_79_per_100k_bins'] = pd.cut(df['population_age_70_79_per_100k'], bins=bins, labels=labels, right=False)
    df['population_age_80_and_older_per_100k_bins'] = pd.cut(df['population_age_80_and_older_per_100k'], bins=bins, labels=labels, right=False)


    # new_confirmed_per_100k_pct_change
    bins = [-100, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 100]
    labels = ['<-50%', '-50% - -40%', '-40% - -30%', '-30% - -20%', '-20% - -10%', '-10% - 0%', '0% - 10%', '10% - 20%', '20% - 30%', '30% - 40%', '40% - 50%', '>50%']
    df['new_confirmed_per_100k_pct_change_bins'] = pd.cut(df['new_confirmed_per_100k_pct_change'], bins=bins, labels=labels, right=False)
    df['new_deceased_per_100k_pct_change_bins'] = pd.cut(df['new_deceased_per_100k_pct_change'], bins=bins, labels=labels, right=False)
    df['cumulative_confirmed_per_100k_pct_change_bins'] = pd.cut(df['cumulative_confirmed_per_100k_pct_change'], bins=bins, labels=labels, right=False)
    df['cumulative_deceased_per_100k_pct_change_bins'] = pd.cut(df['cumulative_deceased_per_100k_pct_change'], bins=bins, labels=labels, right=False)
    df['cumulative_persons_fully_vaccinated_per_100k_pct_change_bins'] = pd.cut(df['cumulative_persons_fully_vaccinated_per_100k_pct_change'], bins=bins, labels=labels, right=False)

    df['average_temperature_celsius_pct_change_bins'] = pd.cut(df['average_temperature_celsius_pct_change'], bins=bins, labels=labels, right=False)
    df['minimum_temperature_celsius_pct_change_bins'] = pd.cut(df['minimum_temperature_celsius_pct_change'], bins=bins, labels=labels, right=False)
    df['maximum_temperature_celsius_pct_change_bins'] = pd.cut(df['maximum_temperature_celsius_pct_change'], bins=bins, labels=labels, right=False)

    df['rainfall_mm_pct_change_bins'] = pd.cut(df['rainfall_mm_pct_change'], bins=bins, labels=labels, right=False)
    df['relative_humidity_pct_change_bins'] = pd.cut(df['relative_humidity_pct_change'], bins=bins, labels=labels, right=False)

    
    
apply_binning(weekly_df)

In [12]:
sample_df = weekly_df.sample(n=10)
sample_df


Unnamed: 0,county_name,week,description,area_sq_km,life_expectancy,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,relative_humidity,new_confirmed_per_100k,new_deceased_per_100k,cumulative_confirmed_per_100k,cumulative_deceased_per_100k,new_persons_fully_vaccinated_per_100k,cumulative_persons_fully_vaccinated_per_100k,population_male_per_100k,population_female_per_100k,population_age_00_09_per_100k,population_age_10_19_per_100k,population_age_20_29_per_100k,population_age_30_39_per_100k,population_age_40_49_per_100k,population_age_50_59_per_100k,population_age_60_69_per_100k,population_age_70_79_per_100k,population_age_80_and_older_per_100k,new_confirmed_per_100k_pct_change,new_deceased_per_100k_pct_change,cumulative_confirmed_per_100k_pct_change,cumulative_deceased_per_100k_pct_change,new_persons_fully_vaccinated_per_100k_pct_change,cumulative_persons_fully_vaccinated_per_100k_pct_change,average_temperature_celsius_pct_change,minimum_temperature_celsius_pct_change,maximum_temperature_celsius_pct_change,rainfall_mm_pct_change,relative_humidity_pct_change,area_sq_km_bins,life_expectancy_bins,average_temperature_celsius_bins,minimum_temperature_celsius_bins,maximum_temperature_celsius_bins,rainfall_mm_bins,relative_humidity_bins,new_confirmed_per_100k_bins,cumulative_confirmed_per_100k_bins,cumulative_deceased_per_100k_bins,new_persons_fully_vaccinated_per_100k_bins,cumulative_persons_fully_vaccinated_per_100k_bins,population_male_per_100k_bins,population_female_per_100k_bins,population_age_00_09_per_100k_bins,population_age_10_19_per_100k_bins,population_age_20_29_per_100k_bins,population_age_30_39_per_100k_bins,population_age_40_49_per_100k_bins,population_age_50_59_per_100k_bins,population_age_60_69_per_100k_bins,population_age_70_79_per_100k_bins,population_age_80_and_older_per_100k_bins,new_confirmed_per_100k_pct_change_bins,new_deceased_per_100k_pct_change_bins,cumulative_confirmed_per_100k_pct_change_bins,cumulative_deceased_per_100k_pct_change_bins,cumulative_persons_fully_vaccinated_per_100k_pct_change_bins,average_temperature_celsius_pct_change_bins,minimum_temperature_celsius_pct_change_bins,maximum_temperature_celsius_pct_change_bins,rainfall_mm_pct_change_bins,relative_humidity_pct_change_bins
5079,"Burke County, North Carolina",2022-01-31,"Burke County, North Carolina - Week of 2022-01-31",1334.0,76.747059,3.572222,-2.121429,9.768254,31.29915,66.664848,1416.418026,4.436705,25447.829897,300.586754,222.944419,45833.379549,50008.318822,49991.681178,9828.41044,11903.679138,12393.935024,10759.009284,12836.496334,15541.777122,13419.923023,8611.644132,4705.125503,-3.110774,-20.0,5.894028,1.498127,43.571429,0.488801,203.301921,-53.022847,14.429154,2540.536457,29.449909,1000-2000,76-78,0-15°C,<0°C,0-15°C,>20mm,60-70%,>400,>10000,300-400,200-250,40000-50000,50000-52000,48000-50000,8000-10000,10000-12000,12000-14000,10000-12000,12000-14000,14000-16000,12000-14000,8000-10000,4000-6000,-10% - 0%,-20% - -10%,0% - 10%,0% - 10%,0% - 10%,,<-50%,10% - 20%,,20% - 30%
26217,"Lake County, California",2021-09-13,"Lake County, California - Week of 2021-09-13",3443.0,74.821429,17.74515,11.864197,25.998236,1.016,62.897869,266.724899,7.798974,9374.366333,135.702142,907.800533,49721.576641,49794.886993,50205.113007,11704.699662,11063.624027,10564.489713,10885.807428,10523.93505,14590.319914,16070.565114,9854.783111,4741.775982,-37.132353,25.0,2.928584,6.097561,-22.4,1.859722,-13.957393,-4.448567,-15.018484,115.384746,28.057117,3000-4000,74-76,15-25°C,0-15°C,25-35°C,<5mm,60-70%,250-300,9000-10000,100-200,>400,40000-50000,48000-50000,50000-52000,10000-12000,10000-12000,10000-12000,10000-12000,10000-12000,14000-16000,16000-18000,8000-10000,4000-6000,-40% - -30%,20% - 30%,0% - 10%,0% - 10%,0% - 10%,-20% - -10%,-10% - 0%,-20% - -10%,,20% - 30%
36489,"Onondaga County, New York",2022-08-15,"Onondaga County, New York - Week of 2022-08-15",2087.0,79.401538,20.495238,13.411905,28.74127,10.2616,70.817652,0.0,0.0,,,61.282604,74370.445761,48203.85669,51796.14331,11588.475406,13114.910296,13860.479217,12321.268095,11447.503762,14423.932697,12068.775106,6636.278002,4538.377419,,,,,-41.528926,0.08247,-0.200959,-6.361167,3.907954,53.030303,5.053707,2000-3000,78-80,15-25°C,0-15°C,25-35°C,10-15mm,70-80%,<50,,,50-100,70000-80000,48000-50000,50000-52000,10000-12000,12000-14000,12000-14000,12000-14000,10000-12000,14000-16000,12000-14000,6000-8000,4000-6000,,,,,0% - 10%,-10% - 0%,-10% - 0%,0% - 10%,>50%,0% - 10%
1003,"Alexander County, North Carolina",2020-03-02,"Alexander County, North Carolina - Week of 202...",682.0,77.171429,7.962698,2.196825,13.919841,8.264173,57.982207,0.0,0.0,,,0.0,,50970.57319,49029.42681,10400.344614,11924.184907,11636.109092,11291.495033,13520.717228,14573.405487,13329.564117,9035.349864,4288.829658,,,,,,,43.035691,301.936212,18.891457,-64.940881,-6.742031,<1000,76-78,0-15°C,0-15°C,0-15°C,5-10mm,50-60%,<50,,,<50,,50000-52000,48000-50000,10000-12000,10000-12000,10000-12000,10000-12000,12000-14000,14000-16000,12000-14000,8000-10000,4000-6000,,,,,,40% - 50%,,10% - 20%,<-50%,-10% - 0%
47949,"Seminole County, Florida",2021-10-25,"Seminole County, Florida - Week of 2021-10-25",893.0,79.789873,22.526191,18.338889,27.957936,26.691167,74.193552,47.320555,5.185814,13408.354779,185.176782,303.154055,56854.025812,48343.888626,51656.111374,11109.742632,12261.857688,13725.769823,14102.173504,13620.757086,13991.542801,11111.903388,6403.616241,3672.636835,-13.438735,166.666667,0.354168,2.881152,-25.37234,0.536073,-5.928992,-0.642593,-4.938909,5154.166732,9.723249,<1000,78-80,15-25°C,15-25°C,25-35°C,>20mm,70-80%,<50,>10000,100-200,300-350,50000-60000,48000-50000,50000-52000,10000-12000,12000-14000,12000-14000,14000-16000,12000-14000,12000-14000,10000-12000,6000-8000,2000-4000,-20% - -10%,,0% - 10%,0% - 10%,0% - 10%,-10% - 0%,-10% - 0%,-10% - 0%,,0% - 10%
1694,"Amador County, California",2022-07-11,"Amador County, California - Week of 2022-07-11",1566.0,79.2,26.610891,17.016645,36.261023,0.0,32.734134,0.0,0.0,,,38.931714,55874.795609,53839.964702,46160.035298,8167.87355,8650.626801,9592.774274,11111.111111,11990.967842,15448.104026,17607.516416,11806.691064,5624.334917,,,,,25.0,0.069725,22.659743,27.078535,17.920335,,-32.534331,1000-2000,78-80,25-35°C,15-25°C,>35°C,<5mm,30-40%,<50,,,<50,50000-60000,52000-54000,46000-48000,8000-10000,8000-10000,8000-10000,10000-12000,10000-12000,14000-16000,16000-18000,10000-12000,4000-6000,,,,,0% - 10%,20% - 30%,20% - 30%,10% - 20%,,-40% - -30%
10829,"Crowley County, Colorado",2020-09-14,"Crowley County, Colorado - Week of 2020-09-14",2073.0,76.4,18.905644,7.690476,29.963845,0.0,34.459267,0.0,0.0,1250.0,17.123288,0.0,,73236.30137,26763.69863,6250.0,7294.520548,19708.90411,19092.465753,17020.547945,12619.863014,9109.589041,5993.150685,2910.958904,,,0.0,0.0,,,61.815984,116.563203,36.704329,-100.0,-39.556545,2000-3000,76-78,15-25°C,0-15°C,25-35°C,<5mm,30-40%,<50,1000-2000,<100,<50,,>60000,,6000-8000,6000-8000,18000-20000,18000-20000,16000-18000,12000-14000,8000-10000,4000-6000,2000-4000,,,0% - 10%,0% - 10%,,>50%,,30% - 40%,<-50%,-40% - -30%
41294,"Price County, Wisconsin",2022-03-07,"Price County, Wisconsin - Week of 2022-03-07",3311.0,,-8.397619,-13.906349,-2.676984,1.365956,60.576486,14.947683,14.947683,24215.246637,239.16293,97.15994,61696.562033,50956.651719,49043.348281,8849.028401,11001.494768,8221.22571,8684.603886,11106.12855,17324.364723,18251.121076,10538.116592,6023.916293,-71.428571,inf,0.061767,6.666667,-38.095238,0.157729,45.824151,22.806282,-2072.516708,-88.790454,-2.486626,3000-4000,,<0°C,<0°C,<0°C,<5mm,60-70%,<50,>10000,200-300,50-100,60000-70000,50000-52000,48000-50000,8000-10000,10000-12000,8000-10000,8000-10000,10000-12000,16000-18000,18000-20000,10000-12000,6000-8000,<-50%,,0% - 10%,0% - 10%,0% - 10%,40% - 50%,20% - 30%,,<-50%,-10% - 0%
52518,"Trempealeau County, Wisconsin",2022-04-18,"Trempealeau County, Wisconsin - Week of 2022-0...",1922.0,,8.096825,3.76508,14.427778,25.583444,67.191696,85.115076,3.404603,28676.971265,217.894593,98.733488,66828.952744,50725.180444,49274.819556,13873.75732,13049.843388,10458.940488,11630.123928,11953.561215,14428.707613,12671.932453,7479.912842,4453.220754,127.272727,inf,0.29769,1.587302,0.0,0.147959,97.407115,-447.291376,50.925698,3.11389,11.18112,1000-2000,,0-15°C,0-15°C,0-15°C,>20mm,60-70%,50-100,>10000,200-300,50-100,60000-70000,50000-52000,48000-50000,12000-14000,12000-14000,10000-12000,10000-12000,10000-12000,14000-16000,12000-14000,6000-8000,4000-6000,,,0% - 10%,0% - 10%,0% - 10%,>50%,,>50%,0% - 10%,10% - 20%
36732,"Ontario County, New York",2021-11-01,"Ontario County, New York - Week of 2021-11-01",1716.0,80.244,4.413492,0.121429,10.29127,2.4638,70.035456,354.214976,0.0,9188.592087,101.334697,209.972795,62200.332305,48958.352353,51041.647647,10504.117293,12603.845241,11958.407128,11119.428874,11819.642498,15323.449397,13789.735069,8214.500904,4666.873596,60.995851,-100.0,4.009507,0.0,-27.444795,0.338718,-55.124275,-97.90181,-27.784585,-96.534477,-17.677414,1000-2000,80-82,0-15°C,0-15°C,0-15°C,<5mm,70-80%,350-400,9000-10000,100-200,200-250,60000-70000,48000-50000,50000-52000,10000-12000,12000-14000,10000-12000,10000-12000,10000-12000,14000-16000,12000-14000,8000-10000,4000-6000,>50%,<-50%,0% - 10%,0% - 10%,0% - 10%,<-50%,<-50%,-30% - -20%,<-50%,-20% - -10%


In [14]:
columns_to_keep = ['county_name'
    , 'week'
    , 'description'
    , 'area_sq_km_bins'
    , 'life_expectancy_bins'
    , 'average_temperature_celsius_bins'
    , 'minimum_temperature_celsius_bins'
    , 'maximum_temperature_celsius_bins'
    , 'rainfall_mm_bins'
    , 'relative_humidity_bins'
    , 'new_confirmed_per_100k_bins'
    , 'cumulative_confirmed_per_100k_bins'
    , 'cumulative_deceased_per_100k_bins'
    , 'new_persons_fully_vaccinated_per_100k_bins'
    , 'cumulative_persons_fully_vaccinated_per_100k_bins'
    , 'population_male_per_100k_bins'
    , 'population_female_per_100k_bins'
    , 'population_age_00_09_per_100k_bins'
    , 'population_age_10_19_per_100k_bins'
    , 'population_age_20_29_per_100k_bins'
    , 'population_age_30_39_per_100k_bins'
    , 'population_age_40_49_per_100k_bins'
    , 'population_age_50_59_per_100k_bins'
    , 'population_age_60_69_per_100k_bins'
    , 'population_age_70_79_per_100k_bins'
    , 'population_age_80_and_older_per_100k_bins'
    , 'new_confirmed_per_100k_pct_change_bins'
    , 'new_deceased_per_100k_pct_change_bins'
    , 'cumulative_confirmed_per_100k_pct_change_bins'
    , 'cumulative_deceased_per_100k_pct_change_bins'
    , 'cumulative_persons_fully_vaccinated_per_100k_pct_change_bins'
    , 'average_temperature_celsius_pct_change_bins'
    , 'minimum_temperature_celsius_pct_change_bins'
    , 'maximum_temperature_celsius_pct_change_bins'
    , 'rainfall_mm_pct_change_bins'
    , 'relative_humidity_pct_change_bins'
    ]

weekly_df_binned = weekly_df[columns_to_keep]   

In [15]:
for column in weekly_df_binned.columns:
    print(column)

county_name
week
description
area_sq_km_bins
life_expectancy_bins
average_temperature_celsius_bins
minimum_temperature_celsius_bins
maximum_temperature_celsius_bins
rainfall_mm_bins
relative_humidity_bins
new_confirmed_per_100k_bins
cumulative_confirmed_per_100k_bins
cumulative_deceased_per_100k_bins
new_persons_fully_vaccinated_per_100k_bins
cumulative_persons_fully_vaccinated_per_100k_bins
population_male_per_100k_bins
population_female_per_100k_bins
population_age_00_09_per_100k_bins
population_age_10_19_per_100k_bins
population_age_20_29_per_100k_bins
population_age_30_39_per_100k_bins
population_age_40_49_per_100k_bins
population_age_50_59_per_100k_bins
population_age_60_69_per_100k_bins
population_age_70_79_per_100k_bins
population_age_80_and_older_per_100k_bins
new_confirmed_per_100k_pct_change_bins
new_deceased_per_100k_pct_change_bins
cumulative_confirmed_per_100k_pct_change_bins
cumulative_deceased_per_100k_pct_change_bins
cumulative_persons_fully_vaccinated_per_100k_pct_chan

In [16]:
weekly_df_binned.head()

Unnamed: 0,county_name,week,description,area_sq_km_bins,life_expectancy_bins,average_temperature_celsius_bins,minimum_temperature_celsius_bins,maximum_temperature_celsius_bins,rainfall_mm_bins,relative_humidity_bins,new_confirmed_per_100k_bins,cumulative_confirmed_per_100k_bins,cumulative_deceased_per_100k_bins,new_persons_fully_vaccinated_per_100k_bins,cumulative_persons_fully_vaccinated_per_100k_bins,population_male_per_100k_bins,population_female_per_100k_bins,population_age_00_09_per_100k_bins,population_age_10_19_per_100k_bins,population_age_20_29_per_100k_bins,population_age_30_39_per_100k_bins,population_age_40_49_per_100k_bins,population_age_50_59_per_100k_bins,population_age_60_69_per_100k_bins,population_age_70_79_per_100k_bins,population_age_80_and_older_per_100k_bins,new_confirmed_per_100k_pct_change_bins,new_deceased_per_100k_pct_change_bins,cumulative_confirmed_per_100k_pct_change_bins,cumulative_deceased_per_100k_pct_change_bins,cumulative_persons_fully_vaccinated_per_100k_pct_change_bins,average_temperature_celsius_pct_change_bins,minimum_temperature_celsius_pct_change_bins,maximum_temperature_celsius_pct_change_bins,rainfall_mm_pct_change_bins,relative_humidity_pct_change_bins
0,"Adams County, Colorado",2019-12-30,"Adams County, Colorado - Week of 2019-12-30",3000-4000,76-78,0-15°C,<0°C,0-15°C,<5mm,40-50%,<50,,,<50,,50000-52000,48000-50000,14000-16000,14000-16000,14000-16000,16000-18000,12000-14000,10000-12000,8000-10000,4000-6000,2000-4000,,,,,,,,,,
1,"Adams County, Colorado",2020-01-06,"Adams County, Colorado - Week of 2020-01-06",3000-4000,76-78,0-15°C,<0°C,0-15°C,<5mm,40-50%,<50,,,<50,,50000-52000,48000-50000,14000-16000,14000-16000,14000-16000,16000-18000,12000-14000,10000-12000,8000-10000,4000-6000,2000-4000,,,,,,<-50%,20% - 30%,-30% - -20%,,-10% - 0%
2,"Adams County, Colorado",2020-01-13,"Adams County, Colorado - Week of 2020-01-13",3000-4000,76-78,<0°C,<0°C,0-15°C,<5mm,40-50%,<50,,,<50,,50000-52000,48000-50000,14000-16000,14000-16000,14000-16000,16000-18000,12000-14000,10000-12000,8000-10000,4000-6000,2000-4000,,,,,,,20% - 30%,-10% - 0%,,-10% - 0%
3,"Adams County, Colorado",2020-01-20,"Adams County, Colorado - Week of 2020-01-20",3000-4000,76-78,0-15°C,<0°C,0-15°C,<5mm,40-50%,<50,,,<50,,50000-52000,48000-50000,14000-16000,14000-16000,14000-16000,16000-18000,12000-14000,10000-12000,8000-10000,4000-6000,2000-4000,,,,,,,-50% - -40%,30% - 40%,,0% - 10%
4,"Adams County, Colorado",2020-01-27,"Adams County, Colorado - Week of 2020-01-27",3000-4000,76-78,0-15°C,<0°C,0-15°C,<5mm,40-50%,<50,,,<50,,50000-52000,48000-50000,14000-16000,14000-16000,14000-16000,16000-18000,12000-14000,10000-12000,8000-10000,4000-6000,2000-4000,,,,,,10% - 20%,-10% - 0%,0% - 10%,,10% - 20%
