In [1]:
import pandas as pd
import numpy as np
import requests #api
import json #json file
from datetime import datetime

import matplotlib.pyplot as plt
from scipy.stats import linregress #stastistical analysis

from pathlib import Path #reading csv - connecting to the csv merges mac and windows

## COVID Vaccine Data

In [2]:
# https://data.chhs.ca.gov/dataset/vaccine-progress-dashboard
CA_vax_path = Path("covid19vaccinesbycounty.csv")
CA_vax_df = pd.read_csv(CA_vax_path)
CA_vax_df

Unnamed: 0,county,administered_date,total_doses,cumulative_total_doses,partially_vaccinated,total_partially_vaccinated,fully_vaccinated,cumulative_fully_vaccinated,at_least_one_dose,cumulative_at_least_one_dose,california_flag,up_to_date_count,cumulative_up_to_date_count
0,Alameda,2020-01-05,0,0,0,0,0,0,0,0,,0,0
1,Alameda,2020-07-27,0,0,0,0,0,0,0,0,,0,0
2,Alameda,2020-07-29,0,1,0,1,0,0,0,1,,0,0
3,Alameda,2020-07-30,0,1,0,1,0,0,0,1,,0,0
4,Alameda,2020-07-31,0,1,0,1,0,0,0,1,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71791,Yuba,2023-09-09,2,110137,0,4194,0,40280,0,44480,California,2,7382
71792,Yuba,2023-09-11,2,110139,0,4194,0,40280,0,44480,California,2,7384
71793,Yuba,2023-09-12,1,110140,1,4195,0,40280,1,44481,California,1,7385
71794,Yuba,2023-09-13,1,110141,1,4196,0,40280,1,44482,California,1,7386


In [3]:
#county, admin date - month and year - group by, find max cumulative of take all of cumulative columns
cleaned_vaccine = CA_vax_df[['county','administered_date','cumulative_total_doses','cumulative_fully_vaccinated',\
                             'cumulative_at_least_one_dose','cumulative_up_to_date_count']]
cleaned_vaccine

Unnamed: 0,county,administered_date,cumulative_total_doses,cumulative_fully_vaccinated,cumulative_at_least_one_dose,cumulative_up_to_date_count
0,Alameda,2020-01-05,0,0,0,0
1,Alameda,2020-07-27,0,0,0,0
2,Alameda,2020-07-29,1,0,1,0
3,Alameda,2020-07-30,1,0,1,0
4,Alameda,2020-07-31,1,0,1,0
...,...,...,...,...,...,...
71791,Yuba,2023-09-09,110137,40280,44480,7382
71792,Yuba,2023-09-11,110139,40280,44480,7384
71793,Yuba,2023-09-12,110140,40280,44481,7385
71794,Yuba,2023-09-13,110141,40280,44482,7386


In [4]:
# cleaned_vaccine['year'] = cleaned_vaccine['administered_date'].dt.year
# cleaned_vaccine['month'] = cleaned_vaccine['administered_date'].dt.month
# cleaned_vaccine.head()
#defaults turning datetme to timestamp because more secure

In [5]:
cleaned_vaccine.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71796 entries, 0 to 71795
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   county                        71796 non-null  object
 1   administered_date             71796 non-null  object
 2   cumulative_total_doses        71796 non-null  int64 
 3   cumulative_fully_vaccinated   71796 non-null  int64 
 4   cumulative_at_least_one_dose  71796 non-null  int64 
 5   cumulative_up_to_date_count   71796 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 3.3+ MB


In [None]:
# cleaned_vaccine['administered_date'] = pd.to_datetime(cleaned_vaccine['administered_date'])
# cleaned_vaccine.info()
#not going to change into date time because i will  be using the breaking a string as their own with '-'

In [7]:
#line 14 and 15 CA case urv to break up the string so dates are 01-05-2020 to year and month as 2020 and 07
# Get rid of changing into datetime
cleaned_vaccine[['year', 'month', 'day']] = \
    cleaned_vaccine['administered_date'].str.split('-', n=2, expand = True)
#delimiter is for column info not title
#n: (int type_ number of splits, default is -1)
cleaned_vaccine.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_vaccine[['year', 'month', 'day']] = \
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_vaccine[['year', 'month', 'day']] = \
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_vaccine[['year', 'month', 'day']] = \


Unnamed: 0,county,administered_date,cumulative_total_doses,cumulative_fully_vaccinated,cumulative_at_least_one_dose,cumulative_up_to_date_count,year,month,day
0,Alameda,2020-01-05,0,0,0,0,2020,1,5
1,Alameda,2020-07-27,0,0,0,0,2020,7,27
2,Alameda,2020-07-29,1,0,1,0,2020,7,29
3,Alameda,2020-07-30,1,0,1,0,2020,7,30
4,Alameda,2020-07-31,1,0,1,0,2020,7,31


In [10]:
#deleting extraneous columns
del cleaned_vaccine['administered_date']
cleaned_vaccine.head()

Unnamed: 0,county,cumulative_total_doses,cumulative_fully_vaccinated,cumulative_at_least_one_dose,cumulative_up_to_date_count,year,month,day
0,Alameda,0,0,0,0,2020,1,5
1,Alameda,0,0,0,0,2020,7,27
2,Alameda,1,0,1,0,2020,7,29
3,Alameda,1,0,1,0,2020,7,30
4,Alameda,1,0,1,0,2020,7,31


In [11]:
#deleting extraneous columns
del cleaned_vaccine['day']
cleaned_vaccine.head()

Unnamed: 0,county,cumulative_total_doses,cumulative_fully_vaccinated,cumulative_at_least_one_dose,cumulative_up_to_date_count,year,month
0,Alameda,0,0,0,0,2020,1
1,Alameda,0,0,0,0,2020,7
2,Alameda,1,0,1,0,2020,7
3,Alameda,1,0,1,0,2020,7
4,Alameda,1,0,1,0,2020,7


In [14]:
#we want to look at data for each month in each county
cleaned_vaccine.groupby(['month','year'])
cleaned_vaccine

Unnamed: 0,county,cumulative_total_doses,cumulative_fully_vaccinated,cumulative_at_least_one_dose,cumulative_up_to_date_count,year,month
0,Alameda,0,0,0,0,2020,01
1,Alameda,0,0,0,0,2020,07
2,Alameda,1,0,1,0,2020,07
3,Alameda,1,0,1,0,2020,07
4,Alameda,1,0,1,0,2020,07
...,...,...,...,...,...,...,...
71791,Yuba,110137,40280,44480,7382,2023,09
71792,Yuba,110139,40280,44480,7384,2023,09
71793,Yuba,110140,40280,44481,7385,2023,09
71794,Yuba,110141,40280,44482,7386,2023,09


In [None]:
#finding the max in each county per month
max_month = cleaned_vaccine.groupby('cumulative_total_doses').max()['cumulative_total_doses']