In [1]:
# Data manipulation libraries
import numpy as np
import pandas as pd
from collections import defaultdict

import matplotlib.pyplot as plt
# System libraries
import glob
import os
import sys

# Library for reading through pdf
# pip install pymupdf
# pip install fitz
import fitz

**Data Collection**  
Using a bash script to save all the PDF files from the [google mobility site](https://www.google.com/covid19/mobility/) and store them in a folder.

In [2]:
%%bash

mkdir -p mobility_pdfs/
cd mobility_pdfs/
# hacky way to create a list of states in USA
states_list="Alabama Alaska Arizona Arkansas California Colorado Connecticut Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota Mississippi Missouri Montana Nebraska Nevada New_Hampshire New_Jersey New_Mexico New_York North_Carolina North_Dakota Ohio Oklahoma Oregon Pennsylvania Rhode_Island South_Carolina South_Dakota Tennessee Texas Utah Vermont Virginia Washington West_Virginia Wisconsin Wyoming"
country_list = "US Spain Italy France Germany"
country_list_short = "US ES IT FR DE"
date="2020-04-05"

# Get the pdfs for the mobility information of the states
for state in $states_list ; do
    curl -s -O https://www.gstatic.com/covid19/mobility/${date}_US_${state}_Mobility_Report_en.pdf
done
# Get the pdf for the US country
curl -s -o ${date}_US_US_Mobility_Report_en.pdf https://www.gstatic.com/covid19/mobility/${date}_US_Mobility_Report_en.pdf

for country in $country_list_short ; do
    curl -s -O https://www.gstatic.com/covid19/mobility/${date}_${country}_Mobility_Report_en.pdf
done

bash: line 6: country_list: command not found
bash: line 7: country_list_short: command not found


**PDF Parser**  
Obtained this script online for parsing from an incoming stream of data and reading the plots in the page. 

In [3]:
def parse_streaming_data(stream):
    data_raw = []
    data_transformed = []
    rotparams = None
    npatches = 0
    for line in stream.splitlines():
        if line.endswith(" cm"):
            # page 146 of https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/pdf_reference_archives/PDFReference.pdf
            rotparams = list(map(float,line.split()[:-1]))
        elif line.endswith(" l"):
            x,y = list(map(float,line.split()[:2]))
            a,b,c,d,e,f = rotparams
            xp = a*x+c*y+e
            yp = b*x+d*y+f
            data_transformed.append([xp,yp])
            data_raw.append([x,y])
        elif line.endswith(" m"):
            npatches += 1
        else:
            pass
    data_raw = np.array(data_raw)
    basex, basey = data_raw[-1]
    good = False
    if basex == 0.:
        data_raw[:,1] = basey - data_raw[:,1]
        data_raw[:,1] *= 100/60.
        data_raw = data_raw[data_raw[:,1]!=0.]
        if npatches == 1: good = True
    return dict(data=np.array(data_raw), npatches=npatches, good=good)

**PDF Parser**  
The method below parses the pdf by reading lines from it iteratviely and based on specified conditions, stores the information in arrays.

In [4]:
def parse_page(doc, ipage, verbose=False):
    # Set of the categories required
    categories_list = [
        "Retail & recreation",
        "Grocery & pharmacy",
        "Parks",
        "Transit stations",
        "Workplace",
        "Residential",
    ]
    
    counties = []
    curr_county = None
    curr_category = None
    data = defaultdict(lambda: defaultdict(list))
    pagetext = doc.getPageText(ipage)
    lines = pagetext.splitlines()
    tickdates = list(filter(lambda x:len(x.split())==3, set(lines[-10:])))
    #print (tickdates)
    count  =0
    for line in lines:
        # Removing unwanted data from the page
        if ("* Not enough data") in line: continue
        if ("needs a significant volume of data") in line: continue

        # If found the category line, add it to the dictionary, else keep iterating over.
        if any(line.startswith(category) for category in categories_list):
            curr_category = line
        elif curr_category:
            data[curr_county][curr_category].append(line)

        # Filtering data to find the county information
        if (all(category not in line for category in categories_list)
            and ("compared to baseline" not in line)
            and ("Not enough data" not in line)
           ):
            # Only two counties per page
            if len(data.keys()) == 2: break
            count +=1
            #print (line, count)    
            counties.append(line)
            curr_county = line
            
    # Debugging entry skipping 
    if (ipage==5):
        print (ipage,counties, "\n")
    for county in data :
        print (county)
        newdata = {}
    for county in data:
        newdata[county] = {}
        
        for category in data[county]:
            # Skipping the ones with no data. We get to know that based on the space and * in the Pdf
            if category.endswith(" "): continue
            temp = [x for x in data[county][category] if "compared to baseline" in x]
            if not temp: continue
            percent = int(temp[0].split()[0].replace("%",""))
            newdata[county][category.strip()] = percent
    data = newdata
    for county in data :
        print (county, data[county])
    tomatch = []
    #Create a list of counties and the available categories for the given county
    for county in counties:
        for category in categories_list:
            if category in data[county]:
                tomatch.append([county,category,data[county][category]])
                

    print(len(tomatch))
    print(data)

    # Get the readable plots from the page ( Since there are broken and empty plots in the page)
    readableplots = []
    xrefs = sorted(doc.getPageXObjectList(ipage), key=lambda x:int(x[1].replace("X","")))
    for i,xref in enumerate(xrefs):
        stream = doc.xrefStream(xref[0]).decode()
        info = parse_streaming_data(stream)
        if not info["good"]: continue
        readableplots.append(info)
    
    print(len(readableplots))
    
    ret = []
    

    
    for m,g in zip(tomatch,readableplots):
        xs = g["data"][:,0]
        ys = g["data"][:,1]
        maxys = ys[np.where(xs==xs.max())[0]]
        maxy = maxys[np.argmax(np.abs(maxys))]
        
        
        # Parse the dates as text and then based on min to max value, create a range of dates and store it in the dictionary
        ts = list(map(lambda x: pd.Timestamp(x.split(None,1)[-1] + ", 2020"), tickdates))
        low, high = min(ts), max(ts)
        dr = list(map(lambda x:str(x).split()[0], pd.date_range(low, high, freq="D")))
        lutpairs = list(zip(np.linspace(0,200,len(dr)),dr))

        dates = []
        values = []
        asort = xs.argsort()
        xs = xs[asort]
        ys = ys[asort]
        for x,y in zip(xs,ys):
            date = min(lutpairs, key=lambda v:abs(v[0]-x))[1]
            dates.append(date)
            values.append(round(y,3))

        ret.append(dict(
            county=m[0],category=m[1],change=m[2],
            values=values,
            dates=dates,
            changecalc=maxy,
        ))
    return ret

In [5]:
# Create the dataframe for the county and state data.
def parse_state(state):
    doc = fitz.Document(f"mobility_pdfs/2020-04-05_US_{state}_Mobility_Report_en.pdf")
    data = []
    # 2 because we are skipping the first 2 pages from the PDF.
    for i in range(2,doc.pageCount-1):
        #print (i, "The actual page")
        for entry in parse_page(doc, i):
            entry["state"] = state
            entry["page"] = i
            print (i, "the page numbers")
            data.append(entry)
    outname = f"data/{state}.json.gz"
    df = pd.DataFrame(data)
    #ncounties = df['county'].nunique()
    ncounties =df['county'].nunique()
    print(f"Parsed {len(df)} plots for {ncounties} counties in {state}")
    df = df[["state","county","category","change","changecalc","dates", "values","page"]]
    return df

In [6]:
# Create the dataframe for the specific state
string = "Texas"
df = parse_state(string)
csv_name = string +"_mobility.csv"
df.to_csv(csv_name)
parse_state(string).head(100)

Anderson County
Andrews County
Anderson County {'Retail & recreation': -39, 'Grocery & pharmacy': -6, 'Workplace': -26}
Andrews County {'Retail & recreation': -45, 'Grocery & pharmacy': -40, 'Workplace': -31}
6
{'Anderson County': {'Retail & recreation': -39, 'Grocery & pharmacy': -6, 'Workplace': -26}, 'Andrews County': {'Retail & recreation': -45, 'Grocery & pharmacy': -40, 'Workplace': -31}}
6
2 the page numbers
2 the page numbers
2 the page numbers
2 the page numbers
2 the page numbers
2 the page numbers
Angelina County
Aransas County
Angelina County {'Retail & recreation': -48, 'Grocery & pharmacy': -17, 'Transit stations': -40, 'Workplace': -27, 'Residential': 15}
Aransas County {'Retail & recreation': -46, 'Grocery & pharmacy': -35, 'Workplace': -34}
8
{'Angelina County': {'Retail & recreation': -48, 'Grocery & pharmacy': -17, 'Transit stations': -40, 'Workplace': -27, 'Residential': 15}, 'Aransas County': {'Retail & recreation': -46, 'Grocery & pharmacy': -35, 'Workplace': -34}

Culberson County
Dallam County
Culberson County {'Retail & recreation': -54, 'Transit stations': -40}
Dallam County {'Retail & recreation': -38, 'Grocery & pharmacy': -21}
4
{'Culberson County': {'Retail & recreation': -54, 'Transit stations': -40}, 'Dallam County': {'Retail & recreation': -38, 'Grocery & pharmacy': -21}}
4
25 the page numbers
25 the page numbers
25 the page numbers
25 the page numbers
Dallas County
Dawson County
Dallas County {'Retail & recreation': -49, 'Grocery & pharmacy': -20, 'Parks': -46, 'Transit stations': -51, 'Workplace': -42, 'Residential': 15}
Dawson County {'Retail & recreation': -35, 'Grocery & pharmacy': -9}
8
{'Dallas County': {'Retail & recreation': -49, 'Grocery & pharmacy': -20, 'Parks': -46, 'Transit stations': -51, 'Workplace': -42, 'Residential': 15}, 'Dawson County': {'Retail & recreation': -35, 'Grocery & pharmacy': -9}}
8
26 the page numbers
26 the page numbers
26 the page numbers
26 the page numbers
26 the page numbers
26 the page numbers
26 

42 the page numbers
42 the page numbers
42 the page numbers
42 the page numbers
42 the page numbers
42 the page numbers
42 the page numbers
42 the page numbers
Hall County
Hamilton County
Hall County {}
Hamilton County {'Retail & recreation': -54, 'Grocery & pharmacy': -35}
2
{'Hall County': {}, 'Hamilton County': {'Retail & recreation': -54, 'Grocery & pharmacy': -35}}
2
43 the page numbers
43 the page numbers
Hardeman County
Hardin County
Hardeman County {'Transit stations': -25}
Hardin County {'Retail & recreation': -39, 'Grocery & pharmacy': -10, 'Workplace': -36}
4
{'Hardeman County': {'Transit stations': -25}, 'Hardin County': {'Retail & recreation': -39, 'Grocery & pharmacy': -10, 'Workplace': -36}}
4
44 the page numbers
44 the page numbers
44 the page numbers
44 the page numbers
Harris County
Harrison County
Harris County {'Retail & recreation': -46, 'Grocery & pharmacy': -20, 'Parks': -38, 'Transit stations': -62, 'Workplace': -40, 'Residential': 14}
Harrison County {'Retail &

67 the page numbers
67 the page numbers
Matagorda County
Maverick County
Matagorda County {'Retail & recreation': -46, 'Grocery & pharmacy': -26, 'Workplace': -13}
Maverick County {'Retail & recreation': -61, 'Grocery & pharmacy': -41, 'Transit stations': -68, 'Workplace': -41}
7
{'Matagorda County': {'Retail & recreation': -46, 'Grocery & pharmacy': -26, 'Workplace': -13}, 'Maverick County': {'Retail & recreation': -61, 'Grocery & pharmacy': -41, 'Transit stations': -68, 'Workplace': -41}}
7
68 the page numbers
68 the page numbers
68 the page numbers
68 the page numbers
68 the page numbers
68 the page numbers
68 the page numbers
McCulloch County
McLennan County
McCulloch County {'Retail & recreation': -37, 'Grocery & pharmacy': -22}
McLennan County {'Retail & recreation': -49, 'Grocery & pharmacy': -23, 'Parks': -33, 'Transit stations': -36, 'Workplace': -36, 'Residential': 12}
8
{'McCulloch County': {'Retail & recreation': -37, 'Grocery & pharmacy': -22}, 'McLennan County': {'Retail 

85 the page numbers
85 the page numbers
85 the page numbers
85 the page numbers
85 the page numbers
85 the page numbers
85 the page numbers
Robertson County
Rockwall County
Robertson County {'Retail & recreation': -52, 'Grocery & pharmacy': -25, 'Transit stations': -37}
Rockwall County {'Retail & recreation': -47, 'Grocery & pharmacy': -27, 'Transit stations': -26, 'Workplace': -40}
7
{'Robertson County': {'Retail & recreation': -52, 'Grocery & pharmacy': -25, 'Transit stations': -37}, 'Rockwall County': {'Retail & recreation': -47, 'Grocery & pharmacy': -27, 'Transit stations': -26, 'Workplace': -40}}
7
86 the page numbers
86 the page numbers
86 the page numbers
86 the page numbers
86 the page numbers
86 the page numbers
86 the page numbers
Runnels County
Rusk County
Runnels County {'Retail & recreation': -25, 'Grocery & pharmacy': -17}
Rusk County {'Retail & recreation': -30, 'Grocery & pharmacy': -7, 'Workplace': -32}
5
{'Runnels County': {'Retail & recreation': -25, 'Grocery & phar

107 the page numbers
107 the page numbers
107 the page numbers
107 the page numbers
107 the page numbers
107 the page numbers
107 the page numbers
107 the page numbers
107 the page numbers
Winkler County
Wise County
Winkler County {'Retail & recreation': -30, 'Grocery & pharmacy': -25}
Wise County {'Retail & recreation': -39, 'Grocery & pharmacy': -17, 'Transit stations': -24, 'Workplace': -37}
6
{'Winkler County': {'Retail & recreation': -30, 'Grocery & pharmacy': -25}, 'Wise County': {'Retail & recreation': -39, 'Grocery & pharmacy': -17, 'Transit stations': -24, 'Workplace': -37}}
6
108 the page numbers
108 the page numbers
108 the page numbers
108 the page numbers
108 the page numbers
108 the page numbers
Wood County
Yoakum County
Wood County {'Retail & recreation': -38, 'Grocery & pharmacy': -11, 'Workplace': -37}
Yoakum County {'Grocery & pharmacy': -12}
4
{'Wood County': {'Retail & recreation': -38, 'Grocery & pharmacy': -11, 'Workplace': -37}, 'Yoakum County': {'Grocery & pharm

7
13 the page numbers
13 the page numbers
13 the page numbers
13 the page numbers
13 the page numbers
13 the page numbers
13 the page numbers
Calhoun County
Callahan County
Calhoun County {'Retail & recreation': -43, 'Grocery & pharmacy': -17, 'Workplace': -21}
Callahan County {'Retail & recreation': -30, 'Grocery & pharmacy': -15, 'Transit stations': -28}
6
{'Calhoun County': {'Retail & recreation': -43, 'Grocery & pharmacy': -17, 'Workplace': -21}, 'Callahan County': {'Retail & recreation': -30, 'Grocery & pharmacy': -15, 'Transit stations': -28}}
6
14 the page numbers
14 the page numbers
14 the page numbers
14 the page numbers
14 the page numbers
14 the page numbers
Cameron County
Camp County
Cameron County {'Retail & recreation': -68, 'Grocery & pharmacy': -46, 'Parks': -78, 'Transit stations': -71, 'Workplace': -43, 'Residential': 19}
Camp County {'Grocery & pharmacy': -9}
7
{'Cameron County': {'Retail & recreation': -68, 'Grocery & pharmacy': -46, 'Parks': -78, 'Transit stations'

8
40 the page numbers
40 the page numbers
40 the page numbers
40 the page numbers
40 the page numbers
40 the page numbers
40 the page numbers
40 the page numbers
Gregg County
Grimes County
Gregg County {'Retail & recreation': -46, 'Grocery & pharmacy': -15, 'Parks': -25, 'Workplace': -34, 'Residential': 11}
Grimes County {'Retail & recreation': -28, 'Grocery & pharmacy': -5, 'Workplace': -29}
8
{'Gregg County': {'Retail & recreation': -46, 'Grocery & pharmacy': -15, 'Parks': -25, 'Workplace': -34, 'Residential': 11}, 'Grimes County': {'Retail & recreation': -28, 'Grocery & pharmacy': -5, 'Workplace': -29}}
8
41 the page numbers
41 the page numbers
41 the page numbers
41 the page numbers
41 the page numbers
41 the page numbers
41 the page numbers
41 the page numbers
Guadalupe County
Hale County
Guadalupe County {'Retail & recreation': -41, 'Grocery & pharmacy': -24, 'Transit stations': -32, 'Workplace': -37, 'Residential': 13}
Hale County {'Retail & recreation': -39, 'Grocery & pharmacy

59 the page numbers
59 the page numbers
59 the page numbers
59 the page numbers
59 the page numbers
59 the page numbers
Lamar County
Lamb County
Lamar County {'Retail & recreation': -39, 'Grocery & pharmacy': -7, 'Workplace': -17}
Lamb County {'Retail & recreation': -19, 'Grocery & pharmacy': -11, 'Workplace': -24}
6
{'Lamar County': {'Retail & recreation': -39, 'Grocery & pharmacy': -7, 'Workplace': -17}, 'Lamb County': {'Retail & recreation': -19, 'Grocery & pharmacy': -11, 'Workplace': -24}}
4
60 the page numbers
60 the page numbers
60 the page numbers
60 the page numbers
Lampasas County
Lavaca County
Lampasas County {'Retail & recreation': -42, 'Grocery & pharmacy': -31, 'Workplace': -38}
Lavaca County {'Retail & recreation': -35, 'Grocery & pharmacy': -16}
5
{'Lampasas County': {'Retail & recreation': -42, 'Grocery & pharmacy': -31, 'Workplace': -38}, 'Lavaca County': {'Retail & recreation': -35, 'Grocery & pharmacy': -16}}
4
61 the page numbers
61 the page numbers
61 the page num

82 the page numbers
82 the page numbers
82 the page numbers
Randall County
Reagan County
Randall County {'Retail & recreation': -37, 'Grocery & pharmacy': -19, 'Workplace': -36, 'Residential': 11}
Reagan County {'Grocery & pharmacy': -22}
5
{'Randall County': {'Retail & recreation': -37, 'Grocery & pharmacy': -19, 'Workplace': -36, 'Residential': 11}, 'Reagan County': {'Grocery & pharmacy': -22}}
5
83 the page numbers
83 the page numbers
83 the page numbers
83 the page numbers
83 the page numbers
Real County
Red River County
Real County {}
Red River County {}
0
{'Real County': {}, 'Red River County': {}}
0
Reeves County
Refugio County
Reeves County {'Retail & recreation': -39, 'Grocery & pharmacy': -16, 'Transit stations': -20, 'Workplace': -10}
Refugio County {'Retail & recreation': -69, 'Grocery & pharmacy': -44, 'Transit stations': -52}
7
{'Reeves County': {'Retail & recreation': -39, 'Grocery & pharmacy': -16, 'Transit stations': -20, 'Workplace': -10}, 'Refugio County': {'Retail &

101 the page numbers
101 the page numbers
101 the page numbers
101 the page numbers
101 the page numbers
101 the page numbers
101 the page numbers
101 the page numbers
101 the page numbers
Walker County
Waller County
Walker County {'Retail & recreation': -44, 'Grocery & pharmacy': -19, 'Parks': -33, 'Transit stations': -47, 'Workplace': -34}
Waller County {'Retail & recreation': -49, 'Grocery & pharmacy': -16, 'Transit stations': -28, 'Workplace': -37}
9
{'Walker County': {'Retail & recreation': -44, 'Grocery & pharmacy': -19, 'Parks': -33, 'Transit stations': -47, 'Workplace': -34}, 'Waller County': {'Retail & recreation': -49, 'Grocery & pharmacy': -16, 'Transit stations': -28, 'Workplace': -37}}
9
102 the page numbers
102 the page numbers
102 the page numbers
102 the page numbers
102 the page numbers
102 the page numbers
102 the page numbers
102 the page numbers
102 the page numbers
Ward County
Washington County
Ward County {'Retail & recreation': -33, 'Grocery & pharmacy': -17, 'Wo

Unnamed: 0,state,county,category,change,changecalc,dates,values,page
0,Texas,Anderson County,Retail & recreation,-39,-40.993347,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[-4.357, 9.533, 11.36, 22.25, 17.169, 15.157, ...",2
1,Texas,Anderson County,Grocery & pharmacy,-6,-5.767015,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[3.604, -0.947, -0.629, 7.034, 5.682, 14.153, ...",2
2,Texas,Anderson County,Workplace,-26,-27.372258,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[0.76, 2.06, 3.072, 5.378, 2.52, 4.191, 2.741,...",2
3,Texas,Andrews County,Retail & recreation,-45,-47.290382,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[-1.917, 6.448, 1.82, 18.136, 17.054, 12.731, ...",2
4,Texas,Andrews County,Grocery & pharmacy,-40,-41.525905,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[-9.15, 0.62, -4.315, -1.19, 7.485, 2.943, 0.6...",2
...,...,...,...,...,...,...,...,...
95,Texas,Cass County,Grocery & pharmacy,-3,-2.834467,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[2.126, 6.623, -3.492, 12.112, 6.401, 11.018, ...",16
96,Texas,Cass County,Transit stations,-25,-26.041667,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[1.736, -1.628, -3.906, 5.274, 13.355, -4.112,...",16
97,Texas,Cass County,Workplace,-36,-37.037035,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[-3.472, 3.09, -0.425, 0.837, 0.85, 5.556, 7.5...",16
98,Texas,Chambers County,Retail & recreation,-47,-49.424032,"[2020-02-23, 2020-02-24, 2020-02-25, 2020-02-2...","[10.378, -0.136, 8.625, 15.07, 13.19, 11.161, ...",17


In [7]:
#Script for scraping the initial two pages of the pdf file
def parse_page_total(doc, ipage, verbose=False):
    """
    First two pages
    """
    category_list = [
        "Retail & recreation",
        "Grocery & pharmacy",
        "Parks",
        "Transit stations",
        "Workplaces",  # They have workplaces there instead of workplace
        "Residential",
    ]

    curr_category = None
    data = defaultdict(lambda: defaultdict(list))
    pagetext = doc.getPageText(ipage)
    lines = pagetext.splitlines()
    tickdates = []
    for line in lines:

        if ("* Not enough data") in line: continue
        if ("needs a significant volume of data") in line: continue
        # Added this condition to check for the extra text in the right of the page
        if 'Mobility trends ' in line or 'hubs' in line: continue
        # If found the category line, add it to the dictionary, else keep iterating over.
        if any(line.startswith(category) for category in categoriy_list):
            curr_category = line
        # Checking for x axis in the details
        elif line[:3] in ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'):
            tickdates.append(line)
        elif line[0] not in ('+', '-'):
            continue
        elif curr_category:
            data[curr_category] = data.get(curr_category, []) + [line]

    newdata = {}
    for category in data:
        # Skipping the ones with no data. We get to know that based on the space and * in the Pdf
        if category.endswith(" "): continue
        temp = data[category][0]
        percent = int(temp.split()[0].replace("%",""))
        newdata[category.strip()] = percent
    data = newdata

    tomatch = []
    # Create a list of counties and the available categories for the given county
    for category in categories:
        if category in data:
            tomatch.append([category,data[category]])

    # Get the readable plots from the page ( Since there are broken and empty plots in the page)
    readableplots = []
    xrefs = sorted(doc.getPageXObjectList(ipage), key=lambda x:int(x[1].replace("X","")))
    for _, xref in enumerate(xrefs):
        stream = doc.xrefStream(xref[0]).decode()
        info = parse_streaming_data(stream)
        if not info["good"]:
            continue
        readableplots.append(info)
    
    print(len(readableplots))
    
    ret = []
    
    if len(tomatch) != len(readableplots):
        return ret
    
    for m,g in zip(tomatch,plots):
        xs = g["data"][:,0]
        ys = g["data"][:,1]
        maxys = ys[np.where(xs==xs.max())[0]]
        maxy = maxys[np.argmax(np.abs(maxys))]
        
        
        # Parse the dates as text and then based on min to max value, create a range of dates and store it in the dictionary
        ts = list(map(lambda x: pd.Timestamp(x.split(None,1)[-1] + ", 2020"), tickdates))
        low, high = min(ts), max(ts)
        dr = list(map(lambda x:str(x).split()[0], pd.date_range(low, high, freq="D")))
        lutpairs = list(zip(np.linspace(0,200,len(dr)),dr))

        dates = []
        values = []
        asort = xs.argsort()
        xs = xs[asort]
        ys = ys[asort]
        for x,y in zip(xs,ys):
            date = min(lutpairs, key=lambda v:abs(v[0]-x))[1]
            dates.append(date)
            values.append(round(y,3))

        ret.append(dict(
            category=m[0],change=m[1],
            values=values,
            dates=dates,
            changecalc=maxy,
        ))
    return ret


# Create the dataframe for the country data.

def parse_country(country):
    doc = fitz.Document(f"mobility_pdfs/2020-04-05_{country}_Mobility_Report_en.pdf")
    data = []
    for i in range(2):
        for entry in parse_page_total(doc, i):
            entry['state'] = state
            entry['page'] = i
            entry['county'] = 'total'
            data.append(entry)
    df = pd.DataFrame(data)
    return df

In [8]:
# Will replace this with a loop once a list for the codes for all the other countries
df_ES = parse_state_total("ES")
df_IT = parse_state_total("IT")
df_FR = parse_state_total("FR")
df_DE = parse_state_total("DE")

combined = [df_US, df_ES, df_IT, df_FR, df_DE]
df_comb = pd.concat(combined)
df_comb.head(100)

NameError: name 'parse_state_total' is not defined