In [None]:
import pandas as pd
from datetime import datetime
import pycountry
import re
from functools import reduce

In [None]:
# papermill parameters
output_folder = "../output/"

### Read data with pandas CSV Reader function

In [None]:
df = pd.read_csv("https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv ")

In [None]:
# parse date field
df['date'] = pd.to_datetime(df['date'].astype(str), format='%Y-%m-%d')

In [None]:
# initialize subdivisions dict
subdivisions = {}

# declare helper functions

def resolve_iso3166_1_by_name(name):
    
    # get iso_3166_1 from country name
    return pycountry.countries.get(name=name).alpha_2
    
        
def resolve_name(row):
    
    # get name from iso_3166_1
    lookup = pycountry.countries.get(alpha_2=row)
    return lookup.name

def resolve_iso3166_2(row):
    region_code = row['country_region_code']
    sub_region_name = row['sub_region_helper']
    if sub_region_name and (type(sub_region_name) is str) and len(sub_region_name):
        sub_region_name = sub_region_name.lower()
        if sub_region_name not in list(subdivisions[region_code]):
            return row
        row['ISO_3166_2'] = subdivisions[region_code][sub_region_name]
        return row
    return row

In [None]:
df["Last_Update_Date"] = datetime.utcnow()
df['Last_Reported_Flag'] = df['date'].max() == df['date']

In [None]:
df['sub_region_helper'] = df['sub_region_1']
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'AR') & (df['sub_region_helper'] != 'Buenos Aires Province')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'AR')].str.replace(" Province", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'BG')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'BG')].str.replace(" Province", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'BR')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'BR')].str.replace("State of ", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'CZ')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'CZ')].str.replace(" Region", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'CO')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'CO')].str.replace(" Department", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'EE')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'EE')].str.replace(" County", "maa", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'GB')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'GB')].str.replace("(?i) Council", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'GB')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'GB')].str.replace("Greater ", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'GB')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'GB')].str.replace(";.*", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'HU')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'HU')].str.replace(" County", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'IE')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'IE')].str.replace("County ", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'KW')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'KW')].str.replace(" Governorate", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'KW')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'KW')].str.replace(" Governate", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'LV')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'LV')].str.replace(" Municipality", "s novads", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'LV')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'LV')].str.replace("ss novads", "s novads", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'MU')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'MU')].str.replace(" District", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'NL')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'NL')].str.replace("North ", "noord-", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'PL')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'PL')].str.replace(" Voivodeship", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'PT')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'PT')].str.replace(" District", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'RO')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'RO')].str.replace(" County", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'SA')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'SA')].str.replace(" Province", "", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'SE')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'SE')].str.replace(" County", "s lan", regex=True)
df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'UY')] = df['sub_region_helper'].loc[(df['sub_region_helper'].notna()) & (df['country_region_code'] == 'UY')].str.replace(" Department", "", regex=True)

df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)á|ã|à|â|ä|æ|ã|å|ā|Ā|ă", "a", regex=True)  # a
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)é|è|ê|ë|ē|ė|ę|ė|ě", "e", regex=True)  # e
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)í|î|ï|í|ī|į|ì", "i", regex=True)  # i
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ô|õ|ö|ò|ó|ó|œ|ø|ō|õ|ő", "o", regex=True)  # o
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)Ñ|ň|ń|ņ", "n", regex=True)  # n
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)Ú|ü|û|ù|ū|ů", "u", regex=True)  # u
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ß|ś|š|ș|ş", "s", regex=True)  # s
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ž|ź|ż|Ȥ", "z", regex=True)  # z
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ļ|Ł", "l", regex=True)  # l
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ț", "t", regex=True)  # t
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)č|ç|ć", "c", regex=True)  # c
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ý", "y", regex=True)  # y
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ř", "r", regex=True)  # r
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ď|ḍ|đ", "d", regex=True)  # d
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)Ḩ|Ḥ", "h", regex=True)  # h
df['sub_region_helper'] = df['sub_region_helper'].str.replace("(?i)ķ", "k", regex=True) # k


In [None]:
char_replace_map = {
    "á|ã|à|â|ä|æ|ã|å|ā|Ā|ă": "a",
    "é|è|ê|ë|ē|ė|ę|ė|ě": "e",
    "í|î|ï|í|ī|į|ì": "i",
    "ô|õ|ö|ò|ó|ó|œ|ø|ō|õ|ő": "o",
    "Ñ|ň|ń|ņ": "n",
    "Ú|ü|û|ù|ū|ů|ų": "u",
    "ß|ś|š|ș|ş": "s",
    "ž|ź|ż": "z",
    "ļ|Ł": "l",
    "ț": "t",
    "č|ç|ć": "c",
    "ý": "y",
    "ř": "r",
    "ď|ḍ|đ": "d",
    "Ḩ|Ḥ": "h",
    ";.*": "",
    "ķ": "k"
}

# iterate through country_regions
for region in df['country_region_code'].loc[df['sub_region_1'].notna()].unique():
    subdivisions[region] = { 
        reduce(
            lambda name, pattern: re.sub(*pattern, name, flags=re.IGNORECASE),  # regex place the pattern
            char_replace_map.items(), subd.name.lower()): subd.code.replace(f"{region}-", "")  # lowercase 
        for subd in pycountry.subdivisions.get(country_code=region)
    }


In [None]:
df['country_region_code'].loc[df['country_region_code'].isna()] = df['country_region'].loc[df['country_region_code'].isna()].apply(lambda row: resolve_iso3166_1_by_name(row))

### Resolve subdivision codes manually

In [None]:
subdivisions['AE']['dubai'] = 'DU'
subdivisions['AE']['umm al quawain'] = 'UQ'
subdivisions['AE']['fujairah'] = 'FU'
subdivisions['AE']['ras al khaimah'] = 'RK'
subdivisions['AE']['ajman'] = 'AJ'
subdivisions['AE']['sharjah'] = 'SH'
subdivisions['AE']['abu dhabi'] = 'AZ' 
subdivisions['AR']['la rioja'] = 'F'
subdivisions['AR']['buenos aires province'] = 'B'
subdivisions['AR']['buenos aires'] = 'C'
subdivisions['AT']['vienna'] = '9'
subdivisions['AT']['carinthia'] = '2'
subdivisions['AT']['lower austria'] = '3'
subdivisions['AT']['tyrol'] = '7'
subdivisions['AT']['upper austria'] = '4'
subdivisions['AT']['styria'] = '6'
subdivisions['BG']['smoljan'] = '21'
subdivisions['BG']['sofia city'] = '22'
subdivisions['BG']['vraca'] = '06'
subdivisions['BE']['wallonia'] = 'WAL'
subdivisions['BE']['brussels'] = 'BRU'
subdivisions['BE']['flanders'] = 'VLG'
subdivisions['BR']['federal district'] = 'DF'
subdivisions['CA']['yukon'] = 'YT'
subdivisions['CH']['canton of zug'] = 'ZG'
subdivisions['CH']['canton of bern'] = 'BE'
subdivisions['CH']['basel city'] = 'BS'
subdivisions['CH']['geneva'] = 'GE'
subdivisions['CH']['st. gallen'] = 'SG'
subdivisions['CH']['grisons'] = 'GR'
subdivisions['CH']['lucerne'] = 'LU'
subdivisions['CL']['bio bio'] = 'BI'
subdivisions['CL']['magallanes and chilean antarctica'] = 'MA'
subdivisions['CL']['santiago metropolitan region'] = 'RM'
subdivisions['CL']['aysen'] = 'AI'
subdivisions['CL']['nuble'] = 'NB'
subdivisions['CL']['o\'higgins'] = 'LI'
subdivisions['CO']['bogota'] = 'DC'
subdivisions['CO']['san andres and providencia'] = 'SAP'
subdivisions['CO']['north santander'] = 'NSA'
subdivisions['CZ']['hradec kralove region'] = '52'
subdivisions['CZ']['central bohemian'] = '20'
subdivisions['CZ']['moravian-silesian'] = '80'
subdivisions['CZ']['plzen'] = '32'
subdivisions['CZ']['prague'] = '10'
subdivisions['CZ']['south bohemian'] = '31'
subdivisions['CZ']['south moravian'] = '64'
subdivisions['CZ']['vysocina'] = '63'
subdivisions['DE']['hesse'] = 'HE'
subdivisions['DE']['north rhine-westphalia'] = 'NW'
subdivisions['DE']['saxony-anhalt'] = 'ST'
subdivisions['DE']['saxony'] = 'SN'
subdivisions['DE']['thuringia'] = 'TH'
subdivisions['DE']['rhineland-palatinate'] = 'RP'
subdivisions['DE']['lower saxony'] = 'NI'
subdivisions['DE']['bavaria'] = 'BY'
subdivisions['DK']['north denmark region'] = '81'
subdivisions['DK']['region zealand'] = '85'
subdivisions['DK']['region of southern denmark'] = '83'
subdivisions['DK']['central denmark region'] = '82'
subdivisions['DK']['capital region of denmark'] = '84'
subdivisions['ES']['andalusia'] = 'AN'
subdivisions['ES']['balearic islands'] = 'IB'
subdivisions['ES']['basque country'] = 'PV'
subdivisions['ES']['canary islands'] = 'CN'
subdivisions['ES']['castile and leon'] = 'CL'
subdivisions['ES']['castile-la mancha'] = 'CM'
subdivisions['ES']['catalonia'] = 'CT'
subdivisions['ES']['community of madrid'] = 'MD'
subdivisions['ES']['navarre'] = 'NA'
subdivisions['ES']['region of murcia'] = 'MC'
subdivisions['ES']['valencian community'] = 'VC'
subdivisions['FI']['central finland'] = '08'
subdivisions['FI']['central ostrobothnia'] = '07'
subdivisions['FI']['lapland'] = '10'
subdivisions['FI']['north karelia'] = '13'
subdivisions['FI']['northern ostrobothnia'] = '14'
subdivisions['FI']['northern savonia'] = '15'
subdivisions['FI']['ostrobothnia'] = '12'
subdivisions['FI']['paijanne tavastia'] = '16'
subdivisions['FI']['south karelia'] = '02'
subdivisions['FI']['southern ostrobothnia'] = '03'
subdivisions['FI']['southern savonia'] = '04'
subdivisions['FI']['southwest finland'] = '19'
subdivisions['FI']['tavastia proper'] = '06'
subdivisions['FR']['brittany'] = 'BRE'
subdivisions['FR']['corsica'] = 'COR'
subdivisions['FR']['grand est'] = 'GES'
subdivisions['FR']['normandy'] = 'NOR'
subdivisions['FR']['pays de la loire'] = 'PDL'
subdivisions['FR']['provence-alpes-cote d\'azur'] = 'PAC'
subdivisions['GB']['armagh city, banbridge and craigavon'] = 'ABC'
subdivisions['GB']['borough of halton'] = 'HAL'
subdivisions['GB']['bridgend county borough'] = 'BGE'
subdivisions['GB']['caerphilly county borough'] = 'CAY'
subdivisions['GB']['cardiff'] = 'CRF'
subdivisions['GB']['rhondda cynon taff'] = 'RCT'
subdivisions['GB']['scottish borders'] = 'SCB'
subdivisions['GB']['city of bristol'] = 'BST'
subdivisions['GB']['conwy principal area'] = 'CWY'
subdivisions['GB']['county durham'] = 'DUR'
subdivisions['GB']['edinburgh'] = 'EDH'
subdivisions['GB']['anglesey'] = 'AGY'
subdivisions['GB']['london'] = 'LND'
subdivisions['GB']['merthyr tydfil county borough'] = 'MTY'
subdivisions['GB']['neath port talbot principle area'] = 'NTL'
subdivisions['GB']['orkney'] = 'ORK'
subdivisions['GB']['torfaen principal area'] = 'TOF'
subdivisions['GB']['vale of glamorgan'] = 'VGL'
subdivisions['GB']['wrexham principal area'] = 'WRX'
subdivisions['GR']['crete region'] = 'M'
subdivisions['GR']['decentralized administration of attica'] = 'A1'
subdivisions['GR']['decentralized administration of epirus and western macedonia'] = 'C'
subdivisions['GR']['decentralized administration of macedonia and thrace'] = 'A'
subdivisions['GR']['decentralized administration of the aegean'] = 'K'
subdivisions['GR']['decentralized administration of thessaly and central greece'] = 'E'
subdivisions['HR']['bjelovar-bilogora county'] = '07'
subdivisions['HR']['brod-posavina county'] = '12'
subdivisions['HR']['city of zagreb'] = '21'
subdivisions['HR']['dubrovnik-neretva county'] = '19'
subdivisions['HR']['istria county'] = '18'
subdivisions['HR']['karlovac county'] = '04'
subdivisions['HR']['koprivnica-krizevci county'] = '06'
subdivisions['HR']['krapina-zagorje county'] = '02'
subdivisions['HR']['lika-senj county'] = '09'
subdivisions['HR']['medimurje county'] = '20'
subdivisions['HR']['osijek-baranja county'] = '14'
subdivisions['HR']['pozega-slavonia county'] = '11'
subdivisions['HR']['primorje-gorski kotar county'] = '08'
subdivisions['HR']['sibenik-knin county'] = '15'
subdivisions['HR']['sisak-moslavina county'] = '03'
subdivisions['HR']['split-dalmatia county'] = '17'
subdivisions['HR']['varazdin county'] = '05'
subdivisions['HR']['virovitica-podravina county'] = '10'
subdivisions['HR']['vukovar-srijem county'] = '16'
subdivisions['HR']['zadar county'] = '13'
subdivisions['HR']['zagreb county'] = '01'
subdivisions['ID']['central java'] = 'ID'
subdivisions['ID']['central kalimantan'] = 'KT'
subdivisions['ID']['central sulawesi'] = 'ST'
subdivisions['ID']['east java'] = 'JI'
subdivisions['ID']['east kalimantan'] = 'KI'
subdivisions['ID']['East Nusa Tenggara'] = 'NT'
subdivisions['ID']['jakarta'] = 'JK'
subdivisions['ID']['west kalimantan'] = 'KB'
subdivisions['ID']['north kalimantan'] = 'KA'
subdivisions['ID']['north maluku'] = 'MU'
subdivisions['ID']['north sulawesi'] = 'SA'
subdivisions['ID']['north sumatra'] = 'SU'
subdivisions['ID']['bangka belitung islands'] = 'BB'
subdivisions['ID']['riau islands'] = 'KR'
subdivisions['ID']['south east sulawesi'] = 'SG'
subdivisions['ID']['south kalimantan'] = 'KS'
subdivisions['ID']['south sulawesi'] = 'SN'
subdivisions['ID']['South Sumatra'] = 'SS'
subdivisions['ID']['special region of yogyakarta'] = 'YO'
subdivisions['ID']['west java'] = 'JB'
subdivisions['ID']['west kalimantan'] = 'KB'
subdivisions['ID']['west nusa tenggara'] = 'NB'
subdivisions['ID']['west papua'] = 'PB'
subdivisions['ID']['west sulawesi'] = 'SR'
subdivisions['ID']['west sumatra'] = 'SB'
subdivisions['IT']['apulia'] = '75'
subdivisions['IT']['sicily'] = '82'
subdivisions['IT']['tuscany'] = '52'
subdivisions['IT']['trentino-south tyrol'] = '32'
subdivisions['IT']['sardinia'] = '88'
subdivisions['IT']['piedmont'] = '21'
subdivisions['IT']['lombardy'] = '25'
subdivisions['KW']['al asimah'] = 'KW'
subdivisions['KW']['al jahra'] = 'JA'
subdivisions["KW"]['mubarak al-kabeer'] = 'MU'
subdivisions['LT']['alytus county'] = 'AL'
subdivisions['LT']['kaunas county'] = 'KU'
subdivisions['LT']['marijampole county'] = 'MR'
subdivisions['LT']['panevezys county'] = 'PN'
subdivisions['LT']['siauliai county'] = 'SA'
subdivisions['LT']['taurage county'] = 'TA'
subdivisions['LT']['klaipeda county'] = 'KL'
subdivisions['LT']['telsiai county'] = 'TE'
subdivisions['LT']['utena county'] = 'UT'
subdivisions['LT']['vilnius county'] = 'VL'
subdivisions['LV']['cesis novads'] = '022'
subdivisions['LV']['adazi novads'] = '011'
subdivisions['LV']['balvus novads'] = '015'
subdivisions['LV']['Burtniekis novads'] = '019'
subdivisions['LV']['city of liepaja'] = 'LPX'
subdivisions['LV']['incukalns novads'] = '037'
subdivisions['LV']['kegums novads'] = '051'
subdivisions['LV']['kocenis novads'] = '045'
subdivisions['LV']['limbazis novads'] = '054'
subdivisions['LV']['ozolniekis novads'] = '069'
subdivisions['LV']['preilis novads'] = '073'
subdivisions['LV']['priekulis novads'] = '075'
subdivisions['LV']['ropazis novads'] = '080'
subdivisions['LV']['saulkrastis novads'] = '089'
subdivisions['LV']['aizkraukle novads'] = '002'
subdivisions['LV']['stopinis novads'] = '095'
subdivisions['LV']['talsis novads'] = '097'
subdivisions['LV']['tukums novads'] = '099'
subdivisions['MU']['riviere noire'] = 'BL'
subdivisions['MX']['coahuila'] = 'COA'
subdivisions['MX']['veracruz'] = 'VER'
subdivisions['MX']['mexico city'] = 'CMX'
subdivisions['MX']['state of mexico'] = 'MEX'
subdivisions['MX']['michoacan'] = 'MIC'
subdivisions['NG']['federal capital territory'] = 'FC'
subdivisions['NG']['nassarawa'] = 'NA'
subdivisions['NG']['ogun state'] = 'OG'
subdivisions['NL']['south holland'] = 'ZH'
subdivisions['NO']['viken'] = '30'
subdivisions['NO']['vestland'] = '46'
subdivisions['NO']['vestfold og telemark'] = '38'
subdivisions['NO']['trondelag'] = '50'
subdivisions['NO']['troms og finnmark'] = '54'
subdivisions['NO']['agder'] = '42'
subdivisions['NO']['innlandet'] = '34'
subdivisions['NZ']['gisborne'] = 'GIS'
subdivisions['NZ']['marlborough'] = 'MBH'
subdivisions['NZ']['nelson'] = 'NSN'
subdivisions['NZ']['tasman'] = 'TAS'
subdivisions['PE']['callao region'] = 'CAL'
subdivisions['PE']['cusco'] = 'CUS'
subdivisions['PE']['lima region'] = 'LIM'
subdivisions['PE']['metropolitan municipality of lima'] = 'LMA'
subdivisions['PL']['greater poland'] = 'WP'
subdivisions['PL']['kuyavian-pomeranian'] = 'KP'
subdivisions['PL']['lesser poland'] = 'MA'
subdivisions['PL']['lodz'] = 'LD'
subdivisions['PL']['lower silesian'] = 'DS'
subdivisions['PL']['lublin'] = 'LU'
subdivisions['PL']['lubusz'] = 'LB'
subdivisions['PL']['masovian'] = 'MZ'
subdivisions['PL']['opole'] = 'OP'
subdivisions['PL']['pomeranian'] = 'PM'
subdivisions['PL']['silesian'] = 'SL'
subdivisions['PL']['warmian-masurian'] = 'WN'
subdivisions['PL']['west pomeranian'] = 'ZP'
subdivisions['PT']['azores'] = '20'
subdivisions['PT']['madeira'] = '30'
subdivisions['PT']['lisbon'] = '11'
subdivisions['RO']['bucharest'] = 'B'
subdivisions['SA']['al jowf'] = '12'
subdivisions['SA']['al qasim'] = '05'
subdivisions['SA']['aseer'] = '14'
subdivisions['SA']['hail'] = '06'
subdivisions['SA']['jazan'] = '09'
subdivisions['SA']['riyadh'] = '01'
subdivisions['SA']['eastern'] = '04'
subdivisions['SA']['northern borders'] = '08'
subdivisions['SE']['blekinges lan'] = 'K'
subdivisions['SE']['kalmars lan'] = 'H'
subdivisions['SE']['orebros lan'] = 'T'
subdivisions['SE']['skanes lan'] = 'M'
subdivisions['SE']['uppsalas lan'] = 'C'
subdivisions['SI']['administrative unit maribor'] = '070'
subdivisions['SI']['izola'] = '040'
subdivisions['SI']['koper'] = '050'
subdivisions['SI']['lendava'] = '059'
subdivisions['SI']['sobota'] = '080'
subdivisions['SI']['municipality of hrastnik'] = '034'
subdivisions['SI']['piran'] = '090'
subdivisions['SI']['postojna'] = '094'
subdivisions['SK']['bratislava region'] = 'BL'
subdivisions['SK']['kosice region'] = 'KI'
subdivisions['SK']['nitra region'] = 'NI'
subdivisions['SK']['presov region'] = 'PV'
subdivisions['SK']['trencin region'] = 'TC'
subdivisions['SK']['trnava region'] = 'TA'
subdivisions['SK']['zilina region'] = 'ZI'
subdivisions['SK']['banska bystrica region'] = 'BC'
subdivisions['ZA']['north west'] = 'NW'

In [None]:
df['country_region'] = df['country_region_code'].apply(lambda region: resolve_name(region))
df = df.apply(lambda row: resolve_iso3166_2(row), axis="columns")

In [None]:
df = df.drop(columns=['sub_region_helper'])

In [None]:
column_map = {
    "sub_region_1": "PROVINCE_STATE",
    "country_region_code": "ISO_3166_1",
    "grocery_and_pharmacy_percent_change_from_baseline": "grocery_and_pharmacy_change_perc",
    "parks_percent_change_from_baseline": "parks_change_perc",
    "residential_percent_change_from_baseline": "residential_change_perc",
    "retail_and_recreation_percent_change_from_baseline": "retail_and_recreation_change_perc",
    "transit_stations_percent_change_from_baseline": "transit_stations_change_perc",
    "workplaces_percent_change_from_baseline": "workplaces_change_perc"
}
df = df.rename(columns=column_map)

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
df.to_csv(output_folder + "GOOG_GLOBAL_MOBILITY_REPORT.csv", index=False, columns=[
    "country_region",
    "PROVINCE_STATE",
    "ISO_3166_1",
    "ISO_3166_2",
    "date",
    "grocery_and_pharmacy_change_perc",
    "parks_change_perc",
    "residential_change_perc",
    "retail_and_recreation_change_perc",
    "transit_stations_change_perc",
    "workplaces_change_perc",
    "Last_Update_Date",
    "Last_Reported_Flag",
    "sub_region_2"
])