In [80]:
import os
from datetime import datetime
import pandas as pd

daily_reports = filter(lambda x: x.endswith('csv'), os.listdir("./COVID-19/csse_covid_19_data/csse_covid_19_daily_reports"))

india_cases = pd.DataFrame()

sorted_files = sorted(daily_reports, key=lambda file: datetime.strptime(file, '%m-%d-%Y.csv'))

# Aggregate only for india and store in india_cases_<last_date>.csv
for file in sorted_files:
    filename = os.path.join("./COVID-19/csse_covid_19_data/csse_covid_19_daily_reports", file)
    temp_df = pd.read_csv(filename)

    temp_df['Date'] = datetime.strptime(file, '%m-%d-%Y.csv')
    
    if 'Country/Region' in temp_df.columns:
        temp_df.rename(columns = {'Country/Region':'Country_Region'}, inplace = True)
        
    india_cases = pd.concat([india_cases, temp_df[temp_df['Country_Region'] == 'India']])
        
india_cases.reset_index(drop=True, inplace=True)

india_cases.head()

Unnamed: 0,Province/State,Country_Region,Last Update,Confirmed,Deaths,Recovered,Date,Latitude,Longitude,FIPS,...,Province_State,Last_Update,Lat,Long_,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio,Incident_Rate,Case_Fatality_Ratio
0,,India,1/30/20 16:00,1.0,,,2020-01-30,,,,...,,,,,,,,,,
1,,India,1/31/2020 23:59,1.0,,,2020-01-31,,,,...,,,,,,,,,,
2,,India,1/31/2020 8:15,1.0,0.0,0.0,2020-02-01,,,,...,,,,,,,,,,
3,,India,2020-02-02T06:03:08,2.0,0.0,0.0,2020-02-02,,,,...,,,,,,,,,,
4,,India,2020-02-03T21:43:02,3.0,0.0,0.0,2020-02-03,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18209,,India,,84285.0,816.0,,2021-10-12,,,,...,Tripura,2021-10-13 04:20:51,23.746783,91.743565,,"Tripura, India",,,2021.322876,0.968144
18210,,India,,0.0,0.0,,2021-10-12,,,,...,Unknown,2021-10-13 04:20:51,,,,"Unknown, India",,,,
18211,,India,,1709985.0,22896.0,,2021-10-12,,,,...,Uttar Pradesh,2021-10-13 04:20:51,26.925425,80.560982,,"Uttar Pradesh, India",,,718.835300,1.338959
18212,,India,,343695.0,7397.0,,2021-10-12,,,,...,Uttarakhand,2021-10-13 04:20:51,30.156447,79.197608,,"Uttarakhand, India",,,3054.833685,2.152199


In [81]:
last_case_date = datetime.strptime(sorted_files[-1], '%m-%d-%Y.csv').strftime('%d-%m-%Y')
india_cases.to_csv('./india_cases_%s.csv' % last_case_date)

In [82]:
# Get vaccination data until the last date

csv_url = 'http://data.covid19india.org/csv/latest/cowin_vaccine_data_statewise.csv'

india_vaccines = pd.read_csv(csv_url)

india_vaccines = india_vaccines[india_vaccines['Total Doses Administered'].notna()]


india_vaccines['Updated On'] = pd.to_datetime(india_vaccines['Updated On'], format='%d/%m/%Y')

india_vaccines.sort_values(by=['Updated On', 'State'], inplace=True, ignore_index=True)

india_vaccines.head()


Unnamed: 0,Updated On,State,Total Doses Administered,Sessions,Sites,First Dose Administered,Second Dose Administered,Male (Doses Administered),Female (Doses Administered),Transgender (Doses Administered),...,AEFI,18-44 Years (Doses Administered),45-60 Years (Doses Administered),60+ Years (Doses Administered),18-44 Years (Individuals Vaccinated),45-60 Years (Individuals Vaccinated),60+ Years (Individuals Vaccinated),Male (Individuals Vaccinated),Female (Individuals Vaccinated),Transgender (Individuals Vaccinated)
0,2021-01-16,Andaman and Nicobar Islands,23.0,2.0,2.0,23.0,0.0,,,,...,,,,,,,,12.0,11.0,0.0
1,2021-01-16,Andhra Pradesh,4216.0,541.0,339.0,4216.0,0.0,,,,...,,,,,,,,857.0,3359.0,0.0
2,2021-01-16,Arunachal Pradesh,81.0,10.0,9.0,81.0,0.0,,,,...,,,,,,,,40.0,41.0,0.0
3,2021-01-16,Assam,401.0,79.0,64.0,401.0,0.0,,,,...,,,,,,,,141.0,260.0,0.0
4,2021-01-16,Bihar,2967.0,222.0,219.0,2967.0,0.0,,,,...,,,,,,,,926.0,2041.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9985,2021-10-12,Telangana,28306599.0,3618596.0,3120.0,20393627.0,7912972.0,14370006.0,13931284.0,5309.0,...,426.0,15886178.0,7895479.0,4524942.0,,,,,,
9986,2021-10-12,Tripura,3994882.0,58166.0,115.0,2501022.0,1493860.0,2030599.0,1963681.0,602.0,...,78.0,2035501.0,1299565.0,659816.0,,,,,,
9987,2021-10-12,Uttar Pradesh,117159115.0,7765139.0,8111.0,91974814.0,25184301.0,62330170.0,54778681.0,50264.0,...,1851.0,68888424.0,30586541.0,17684150.0,,,,,,
9988,2021-10-12,Uttarakhand,10803381.0,286497.0,725.0,7421381.0,3382000.0,5560359.0,5238909.0,4113.0,...,494.0,6278209.0,2628031.0,1897141.0,,,,,,


In [84]:
last_date_india_vaccines = india_vaccines['Updated On'].max().strftime('%d-%m-%Y')
india_vaccines.to_csv('./india_vaccines_%s.csv' % last_date_india_vaccines)


In [54]:
usa_vaccine = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv')

usa_vaccine

Unnamed: 0,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used
0,2021-01-12,Alabama,78134.0,377025.0,70861.0,0.15,1.59,7270.0,1.45,7.69,,,,0.207
1,2021-01-13,Alabama,84040.0,378975.0,74792.0,0.19,1.71,9245.0,1.53,7.73,5906.0,5906.0,1205.0,0.222
2,2021-01-14,Alabama,92300.0,435350.0,80480.0,,1.88,,1.64,8.88,8260.0,7083.0,1445.0,0.212
3,2021-01-15,Alabama,100567.0,444650.0,86956.0,0.28,2.05,13488.0,1.77,9.07,8267.0,7478.0,1525.0,0.226
4,2021-01-16,Alabama,,,,,,,,,,7498.0,1529.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17496,2021-10-08,Wyoming,534368.0,690105.0,284600.0,42.32,92.33,244957.0,49.17,119.24,2359.0,1922.0,3321.0,0.774
17497,2021-10-09,Wyoming,536884.0,690305.0,285197.0,42.43,92.76,245545.0,49.28,119.27,2516.0,2254.0,3895.0,0.778
17498,2021-10-10,Wyoming,,,,,,,,,,2411.0,4166.0,
17499,2021-10-11,Wyoming,,,,,,,,,,2585.0,4466.0,
