In [1]:
import csv
import datetime

In [2]:
def process_csv(filename):
    example_file = open(filename, encoding="utf-8")
    example_reader = csv.reader(example_file)
    example_data = list(example_reader)
    example_file.close()
    return example_data

In [3]:
csv_rows = process_csv("vaccinations.csv")

csv_header = csv_rows[0]

csv_data = csv_rows[1:]

csv_header

['country',
 'date',
 'daily_vaccinations',
 'total_vaccinations',
 'people_vaccinated',
 'people_fully_vaccinated',
 'total_boosters',
 'population']

In [4]:
# Using the cell function to access the data in a specific cell

def cell(row_idx, col_name):
    """
    Returns the data value (cell) corresponding to the row index and 
    the column name of a CSV file.
    """
    col_idx = csv_header.index(col_name)
    val = csv_data[row_idx][col_idx]
    
    int_list = ['daily_vaccinations', 'total_vaccinations', 'people_vaccinated', \
                'people_fully_vaccinated', 'total_boosters', 'population']
    
    if val == "": # this is how we handle a missing value in the dataset
        return None
    
    elif col_name in int_list:
        return int(val)  # TODO: make sure the function returns val with the appropriate type
    
    return val

In [15]:
# Q1: What dates are mentioned in the dataset?

date_list = []

for row_idx in range(len(csv_data)):
    date = cell(row_idx, "date")
    
    if date not in date_list:
        date_list.append(date)
    
dates = date_list
dates


['01/25/2022',
 '01/26/2022',
 '01/27/2022',
 '01/28/2022',
 '01/29/2022',
 '01/30/2022',
 '01/31/2022']

In [6]:
#print("Country: ", country, "\tDate: ", date, "\TDaily Vaccinations: ", daily_vacc)

In [18]:
# Q2: What countries are listed in the dataset?

country_list = []

for row_idx in range(len(csv_data)):
    country = cell(row_idx, "country")
    
    if country not in country_list:
        country_list.append(country)
        
countries = sorted(country_list)
countries

['Afghanistan',
 'Albania',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Brazil',
 'British Virgin Islands',
 'Brunei',
 'Bulgaria',
 'Cambodia',
 'Canada',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Curacao',
 'Cyprus',
 'Czechia',
 'Democratic Republic of Congo',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'England',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Faeroe Islands',
 'Fiji',
 'Finland',
 'France',
 'Georgia',
 'Germany',
 'Gibraltar',
 'Greece',
 'Greenland',
 'Grenada',
 'Guatemala',
 'Guernsey',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Hong Kong',
 'Hungary',
 'Iceland',
 'India',
 '

In [17]:
# Q3: What is the population of each country in the dataset?

pop_dict = {}

for row_idx in range(len(csv_data)):
    country = cell(row_idx, "country")
    pop = cell(row_idx, "population")
    
    if country not in pop_dict:
        pop_dict[country] = None
        
    if pop_dict[country] == None and pop != None:
        pop_dict[country] = pop

population = pop_dict
population

{'Afghanistan': 39822425,
 'Albania': 2873046,
 'Angola': 33934485,
 'Anguilla': 15126,
 'Antigua and Barbuda': 98726,
 'Argentina': 45605836,
 'Armenia': 2968277,
 'Aruba': 107194,
 'Austria': 9043061,
 'Azerbaijan': 10222990,
 'Bahamas': 396909,
 'Bahrain': 1748265,
 'Bangladesh': 166312179,
 'Barbados': 287709,
 'Belarus': 9442839,
 'Belgium': 11632221,
 'Belize': 404916,
 'Bermuda': 62094,
 'Bhutan': 779889,
 'Bolivia': 11832385,
 'Bosnia and Herzegovina': 3263734,
 'Brazil': 213992450,
 'British Virgin Islands': 30423,
 'Brunei': 441538,
 'Bulgaria': 6896487,
 'Cambodia': 16946049,
 'Canada': 38067582,
 'Cayman Islands': 66498,
 'Central African Republic': 4921767,
 'Chad': 16903319,
 'Chile': 19212518,
 'China': 1444229881,
 'Colombia': 51267219,
 'Comoros': 888427,
 'Costa Rica': 5138966,
 "Cote d'Ivoire": 27053133,
 'Croatia': 4081748,
 'Cuba': 11317506,
 'Curacao': 164798,
 'Cyprus': 895987,
 'Czechia': 10724424,
 'Democratic Republic of Congo': 91814314,
 'Denmark': 5813280,


In [7]:
# Function 1: daily_vaccinations_on(search_date)

def daily_vaccinations_on(search_date):
    '''return a dictionary mapping each country to
    the number of daily vaccinations on the given date;
    if data is missing, the value for that country is None'''
    
    daily_vacc_dict = {}
    
    for row_idx in range(len(csv_data)):
        date = cell(row_idx, "date")
        country = cell(row_idx, "country")
        daily_vacc = cell(row_idx, "daily_vaccinations")
    
        if country not in daily_vacc_dict:
            daily_vacc_dict[country] = None

        if date == search_date and daily_vacc_dict[country] == None:
            if daily_vacc != None:
                daily_vacc_dict[country] = daily_vacc
    
    return daily_vacc_dict


In [8]:
daily_dict = daily_vaccinations_on("01/27/2022")
daily_dict['Bahrain']

2827

In [9]:
# Q6: For each country, what is the sum of the vaccinations given on all of the dates mentioned in the dataset?

sum_vacc_dict = {}
sum_vacc = 0

for row_idx in range(len(csv_data)):
    date = cell(row_idx, "date")
    country = cell(row_idx, "country")
    daily_vacc = cell(row_idx, "daily_vaccinations")
    
    if country not in sum_vacc_dict:
        sum_vacc_dict[country] = None
        
    for i in range(len(date_list)):
        daily_dict = daily_vaccinations_on(date_list[i])
        if daily_dict[country] != None:
            sum_vacc += daily_dict[country]
    
    if sum_vacc_dict[country] == None:
        if sum_vacc != 0:
            sum_vacc_dict[country] = sum_vacc
        else:
            sum_vacc_dict[country] = None
            
    sum_vacc = 0
    
sum_vacc_dict

{'Afghanistan': 6868,
 'Albania': None,
 'Angola': None,
 'Anguilla': None,
 'Antigua and Barbuda': None,
 'Argentina': 1424935,
 'Armenia': None,
 'Aruba': 109,
 'Austria': None,
 'Azerbaijan': 186924,
 'Bahamas': None,
 'Bahrain': 24377,
 'Bangladesh': 2534125,
 'Barbados': 655,
 'Belarus': None,
 'Belgium': 271202,
 'Belize': None,
 'Bermuda': None,
 'Bhutan': None,
 'Bolivia': 330401,
 'Bosnia and Herzegovina': None,
 'Brazil': 7625078,
 'British Virgin Islands': None,
 'Brunei': None,
 'Bulgaria': 61239,
 'Cambodia': 461795,
 'Canada': 1754870,
 'Cayman Islands': None,
 'Central African Republic': None,
 'Chad': None,
 'Chile': 401911,
 'China': 27073000,
 'Colombia': 1082431,
 'Comoros': None,
 'Costa Rica': None,
 "Cote d'Ivoire": None,
 'Croatia': 48491,
 'Cuba': 523203,
 'Curacao': 1103,
 'Cyprus': 2596,
 'Czechia': 220159,
 'Democratic Republic of Congo': None,
 'Denmark': 76940,
 'Djibouti': None,
 'Dominica': None,
 'Dominican Republic': 287355,
 'Ecuador': 392492,
 'Egypt'

In [10]:
# Q8: How many vaccinations (across the world) were given on each date?

world_dict = {}
world_sum = 0

for row_idx in range(len(csv_data)):
    date = cell(row_idx, "date")
    daily_vacc = cell(row_idx, "daily_vaccinations")
    
    if date not in world_dict:
        world_dict[date] = None
            
    if world_dict[date] == None:
        daily_dict = daily_vaccinations_on(date)
        
        for key in daily_dict:
            if daily_dict[key] != None:
                world_sum += daily_dict[key]
        
        world_dict[date] = world_sum
    
    world_sum = 0
        
world_dict


{'01/25/2022': 21919087,
 '01/26/2022': 22075204,
 '01/27/2022': 19551593,
 '01/28/2022': 19825492,
 '01/29/2022': 14844495,
 '01/30/2022': 13056961,
 '01/31/2022': 15251126}

In [11]:
fully_dict = {}
    
for row_idx in range(len(csv_data)):
    date = cell(row_idx, "date")
    country = cell(row_idx, "country")
    fully_vacc = cell(row_idx, "people_fully_vaccinated")
    
    if country not in fully_dict:
        fully_dict[country] = None

    if fully_dict[country] == None:
        if fully_vacc != None:
            fully_dict[country] = date
        
fully_dict

{'Afghanistan': '01/26/2022',
 'Albania': '01/30/2022',
 'Angola': '01/28/2022',
 'Anguilla': '01/28/2022',
 'Antigua and Barbuda': '01/26/2022',
 'Argentina': '01/25/2022',
 'Armenia': '01/30/2022',
 'Aruba': '01/25/2022',
 'Austria': '01/28/2022',
 'Azerbaijan': '01/25/2022',
 'Bahamas': '01/26/2022',
 'Bahrain': '01/25/2022',
 'Bangladesh': '01/25/2022',
 'Barbados': '01/25/2022',
 'Belarus': '01/30/2022',
 'Belgium': '01/25/2022',
 'Belize': '01/28/2022',
 'Bermuda': '01/28/2022',
 'Bhutan': '01/30/2022',
 'Bolivia': '01/25/2022',
 'Bosnia and Herzegovina': '01/25/2022',
 'Brazil': '01/25/2022',
 'British Virgin Islands': '01/28/2022',
 'Brunei': '01/28/2022',
 'Bulgaria': '01/25/2022',
 'Cambodia': '01/25/2022',
 'Canada': '01/25/2022',
 'Cayman Islands': '01/28/2022',
 'Central African Republic': '01/31/2022',
 'Chad': '01/30/2022',
 'Chile': '01/25/2022',
 'China': '01/28/2022',
 'Colombia': '01/25/2022',
 'Comoros': '01/27/2022',
 'Costa Rica': '01/25/2022',
 "Cote d'Ivoire": '

In [12]:
def get_number_of_days(start_date, end_date):
    
    """Gets the number of days between the start_date and end_date"""
    
    # The second argument is a format string to tell the function how to process the date string
    
    day1 = datetime.datetime.strptime(start_date, '%m/%d/%Y') 
    day2 = datetime.datetime.strptime(end_date, '%m/%d/%Y')
    
    delta = day2 - day1
    
    return delta.days


In [13]:
def most_recent_total(col_name, given_date):
    
    '''return a dictionary mapping each country to the most recent column value in the data 
    available by the given date; if no data is available, the value is None.'''
    
    country_info = {}
                
    for row_idx in range(len(csv_data)):
        country = cell(row_idx, "country")
        date = cell(row_idx, "date")
        col_value = cell(row_idx, col_name)
        date_diff = get_number_of_days(date, given_date)
                
        if country not in country_info:
            country_info[country] = col_value
        
        if col_value != None and date_diff >= 0:    
            country_info[country] = col_value

    return country_info

In [19]:
vaccination_stats = {}

for date in dates:
    daily_vax_dict = daily_vaccinations_on(date) # a dict mapping each country to number of daily_vaccinations on date
    total_vax_dict = most_recent_total('total_vaccinations', date) # a dict mapping each country to the most recent number of total_vaccinations on date
    people_vax_dict = most_recent_total('people_vaccinated', date) # a dict mapping each country to the most recent number of people_vaccinated on date
    fully_vax_dict = most_recent_total('people_fully_vaccinated', date) # a dict mapping each country to most recent number of people_fully_vaccinated on date
    total_booster_dict = most_recent_total('total_boosters', date) # a dict mapping each country to most recent number of total_boosters on date
    
    for country in countries:
        if country not in vaccination_stats:
            vaccination_stats[country] = {}
        
        vaccination_stats[country][date] = {}
        
        vaccination_stats[country][date]['country'] = country
        vaccination_stats[country][date]['date'] = date
        
        # TODO: fill in the rest of the dict to match the example above

        vaccination_stats[country][date]['daily_vaccinations'] = daily_vax_dict[country]
        vaccination_stats[country][date]['total_vaccinations'] = total_vax_dict[country]
        vaccination_stats[country][date]['people_vaccinated'] = people_vax_dict[country]
        vaccination_stats[country][date]['people_fully_vaccinated'] = fully_vax_dict[country]
        vaccination_stats[country][date]['total_boosters'] = total_booster_dict[country]
        vaccination_stats[country][date]['population'] = population[country]
        
vaccination_stats   


{'Afghanistan': {'01/25/2022': {'country': 'Afghanistan',
   'date': '01/25/2022',
   'daily_vaccinations': None,
   'total_vaccinations': None,
   'people_vaccinated': None,
   'people_fully_vaccinated': None,
   'total_boosters': None,
   'population': 39822425},
  '01/26/2022': {'country': 'Afghanistan',
   'date': '01/26/2022',
   'daily_vaccinations': None,
   'total_vaccinations': 5074196,
   'people_vaccinated': 4511372,
   'people_fully_vaccinated': 3866279,
   'total_boosters': None,
   'population': 39822425},
  '01/27/2022': {'country': 'Afghanistan',
   'date': '01/27/2022',
   'daily_vaccinations': 6868,
   'total_vaccinations': 5081064,
   'people_vaccinated': 4517380,
   'people_fully_vaccinated': 3868832,
   'total_boosters': None,
   'population': 39822425},
  '01/28/2022': {'country': 'Afghanistan',
   'date': '01/28/2022',
   'daily_vaccinations': None,
   'total_vaccinations': 5081064,
   'people_vaccinated': 4517380,
   'people_fully_vaccinated': 3868832,
   'total

In [None]:
#print("Country: ", country, "\tPopulation: ", pop, "\tFully Vaccinated: ", fully)


In [None]:
#Q6 Another way to do it

sum_vacc_dict ={} #key: country name, value: sum of vaccinations

for date in unique_dates:
    daily_vacc_dict = {}
    daily_vacc_dict = daily_vaccinations_on(date)
    
    for country in daily_vacc_dict:
        if country not in sum_vacc_dict:
            sum_vacc_dict[country] = None

        if daily_vacc_dict[country] != None:
            if sum_vacc_dict[country] == None:
                sum_vacc_dict[country] = 0
        
        sum_vacc_dict[country] = sum_vacc_dict[country] + daily_vacc_dict[country]

sum_vacc_dict

In [None]:
sum_vacc_dict = {}
sum_vacc = 0

for country in countries:
    
    if country not in sum_vacc_dict:
        sum_vacc_dict[country] = None
        
    for date in dates:
        daily_dict = daily_vaccinations_on(date)
        
        if daily_dict[country] != None:
            sum_vacc += daily_dict[country]
    
    if sum_vacc_dict[country] == None:
        
        if sum_vacc != 0:
            sum_vacc_dict[country] = sum_vacc
        else:
            sum_vacc_dict[country] = None
            
    sum_vacc = 0
    
vaccination_sum = sum_vacc_dict
vaccination_sum