## City of LA vax data to Socrata

In [None]:
"""
!pip install google-cloud-bigquery 
!pip install sqlalchemy<1.4.0  
!pip install ibis>=1.4.0, ibis-framework>=1.4.0, ibis-framework[bigquery], ibis-bigquery>=0.1.1
!pip install gcsfs


#select * from ita-electedoffice-vaccines.electedoffice_vaccines_dashboard.la_city_vaccines  limit 100
"""

In [1]:
import gcsfs
import os
import pandas as pd


CREDENTIAL = "../gcp-credential.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = f'{CREDENTIAL}'

gs = gcsfs.GCSFileSystem(project="ita-datalakepoc")

BUCKET_NAME = "electedoffice_vaccines_dashboard"

In [2]:
df = pd.read_csv(f"gcs://{BUCKET_NAME}/data/la_city_vaccines.csv", 
                 dtype={"patient_zipcode": "str"})  
df.head()

Unnamed: 0,site_type,agegroup,race_ethnicity,patient_race,patient_ethnicity,patient_gender,vaccine_site_name,appointment_booked_time,appointment_date,appt_status,patient_zipcode,patient_age,vaccine_id,shot_number,brand,main_site,peh,apptdate
0,City Site,30-49,Asian,Asian,Not of Hispanic Origin,Male,University of Southern California (USC) - Mode...,2021-04-22 16:00:00,2021-04-22,Discharged,91206.0,45.0,2608786,Second Shot,Moderna,USC,False,2021-05-11
1,City Site,30-49,White,White,Not of Hispanic Origin,Male,University of Southern California (USC) - Pfiz...,2021-04-22 23:00:00,2021-04-22,Discharged,90049.0,44.0,2348374,Second Shot,Pfizer,USC,False,2021-04-23
2,Mobile Site,50-64,Hispanic_Latino,Other,Hispanic Origin,Female,Green Meadows - Pfizer (Age 12+) - LAFD,2021-03-16 09:00:00,2021-03-16,Discharged,,55.0,1953612,Second Shot,Pfizer,Green Meadows,,2021-03-30
3,Private Site,30-49,White,White,Not of Hispanic Origin,Male,Los Angeles World Airport (LAWA) - Pfizer - LA...,2021-03-29 10:00:00,2021-03-29,Discharged,90045.0,33.0,1924889,Second Shot,Pfizer,LAWA,False,2021-03-29
4,City Site,30-49,Asian,Asian,Not of Hispanic Origin,Male,Dodger Stadium (Academy Rd) - Moderna (Age 18+...,2021-04-09 18:00:00,2021-04-09,Upcoming,92833.0,45.0,2102223,Second Shot,,Dodger Stadium,False,1972-07-03


In [3]:
check_dates = df[["appointment_date", "apptdate"]]
print("appointment_date <= apptdate -- CORRECT")
print(len(check_dates[check_dates.appointment_date <= check_dates.apptdate]))
print("appointment_date > apptdate -- INCORRECT")
print(len(check_dates[check_dates.appointment_date > check_dates.apptdate]))

appointment_date <= apptdate -- CORRECT
1323884
appointment_date > apptdate -- INCORRECT
283997


In [4]:
check_dates[check_dates.appointment_date > check_dates.apptdate].apptdate.value_counts()

1972-07-03    254608
2021-01-09      4241
2021-01-08      3984
2021-01-07      3587
2021-01-06      1994
               ...  
2021-05-17         1
2021-06-12         1
2021-05-20         1
2021-07-16         1
2021-06-19         1
Name: apptdate, Length: 128, dtype: int64

In [8]:
group_cols = [
    "appointment_date",
    "site_type", "agegroup", "race_ethnicity", 
    "patient_race", "patient_ethnicity", "patient_gender", 
    "vaccine_site_name", "shot_number", "brand", "appt_status",
]

df2 = (df.groupby(group_cols)
       .agg({"vaccine_id": "count"})
       .reset_index()
       .rename(columns = {
           "appointment_date": "date",
           "vaccine_id": "num_vaccines"})
      )


df2.head()

Unnamed: 0,appointment_date,site_type,agegroup,race_ethnicity,patient_race,patient_ethnicity,patient_gender,vaccine_site_name,shot_number,brand,appt_status,num_vaccines
0,2021-01-10,City Site,18-29,Asian,Asian,Not of Hispanic Origin,Female,Lincoln Park - Moderna (Age 18+) - LAFD,First Shot,Moderna,Discharged,11
1,2021-01-10,City Site,18-29,Asian,Asian,Not of Hispanic Origin,Male,Lincoln Park - Moderna (Age 18+) - LAFD,First Shot,Moderna,Discharged,5
2,2021-01-10,City Site,18-29,Asian,Asian,Not of Hispanic Origin,Male,Lincoln Park - Moderna (Age 18+) - LAFD,First Shot,Moderna,Monitoring,1
3,2021-01-10,City Site,18-29,Black,Black,Not of Hispanic Origin,Female,Lincoln Park - Moderna (Age 18+) - LAFD,First Shot,Moderna,Discharged,2
4,2021-01-10,City Site,18-29,Hispanic_Latino,Other,Hispanic Origin,Female,Lincoln Park - Moderna (Age 18+) - LAFD,First Shot,Moderna,Discharged,5


In [9]:
df2.to_csv("la-city-vaccines.csv", index=False)

In [10]:
os.remove("la-city-vaccines.csv")