<a href="https://colab.research.google.com/github/Rohan-Dawar/covid-gifs/blob/main/usa_covid_gif_maker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **COVID-19 in USA GIF Maker:**

### **What this script does**:
#### This script creates .gif files from a series of individual .png files
#### These .png files correspond to daily statistics sourced from the [NYTimes GitHub bot](https://github.com/nytimes/covid-19-data)
#### Functionality is implemented to visualize the USA by state or county, and by raw numbers or rates

### **What is being actively worked on**:
#### 1. Standardizing image size in the png loop
#### 2. NYC borough dispersion
#### 3. Kansas city and Joplin, MO dispersion
#### 4. Rates vs. Raw functionality in main()
#### 5. Alaska & Hawaii insets
#### 6. sub=0 (state) visualization in main()
#### 7. Unknown county to average statewide dispersion


## Requirements

In [None]:
!pip install geopandas
!pip install descartes
!pip install pygifsicle
!sudo apt-get install gifsicle

Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/f7/a4/e66aafbefcbb717813bf3a355c8c4fc3ed04ea1dd7feb2920f2f4f868921/geopandas-0.8.1-py2.py3-none-any.whl (962kB)
[K     |████████████████████████████████| 972kB 6.1MB/s 
[?25hCollecting pyproj>=2.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/e4/ab/280e80a67cfc109d15428c0ec56391fc03a65857b7727cf4e6e6f99a4204/pyproj-3.0.0.post1-cp36-cp36m-manylinux2010_x86_64.whl (6.4MB)
[K     |████████████████████████████████| 6.5MB 30.2MB/s 
[?25hCollecting fiona
[?25l  Downloading https://files.pythonhosted.org/packages/37/94/4910fd55246c1d963727b03885ead6ef1cd3748a465f7b0239ab25dfc9a3/Fiona-1.8.18-cp36-cp36m-manylinux1_x86_64.whl (14.8MB)
[K     |████████████████████████████████| 14.8MB 317kB/s 
Collecting munch
  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Collecting cligj>=0.5
  Downloadin

## Dependencies

In [None]:
import geopandas as gpd
import pandas as pd
import descartes
import matplotlib.pyplot as plt
import requests, io, os

from IPython.display import clear_output

from zipfile import ZipFile as zzip
from timeit import default_timer as timer

import imageio
from pygifsicle import optimize

from PIL import Image

## Auto Grab Shapefile

In [None]:
def grabSF(SFURL):
  """ Takes the URL to the relevant shapefile (SFURL) and returns a geodataframe to be used in the main function
  """

  r = requests.get(SFURL)
  filename = SFURL.split('/')[-1]
  fp = "shapefile_data"
  if not os.path.isdir(fp):
      os.mkdir(fp)

  fp = f'{fp}/'
  with open(f"{fp}/"+filename, "wb") as file:
      file.write(r.content)

  foldername = "contents"
  if not os.path.exists(fp+foldername):
    with zzip(fp+filename, 'r') as file:
            path = fp+foldername
            os.mkdir(path)
            file.extractall(fp+foldername)

  shapefile = filename.split('.')[0]+'.shp'
  geo_df = gpd.read_file(fp+foldername+'/'+shapefile)
  
  covid_geodf = geo_df.copy().drop(labels=['STATEFP','COUNTYFP','COUNTYNS','AFFGEOID','LSAD','ALAND','AWATER'],axis=1)
  covid_geodf['GEOID'] = covid_geodf['GEOID'].astype(int)
  covid_geodf = covid_geodf.rename(columns={"GEOID": "fips"})

  return covid_geodf

## State/County Population Data

Source: 

https://www.census.gov/data/datasets/time-series/demo/popest/2010s-counties-total.html

https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv

In [None]:
def popCsvToDf(sub, censuscsv):
  """ Takes a relevant .csv census data file and returns a population dataframe to be used in the main function
  """

  pop_df = pd.read_csv(censuscsv, encoding='latin-1')

  if sub == 0: #state
    pop_df = pop_df[['SUMLEV', 'STATE', 'COUNTY', 'POPESTIMATE2015']].copy()
    pop_df = pop_df[pop_df['SUMLEV'] != 40]
    pop_df['fips'] = pop_df['STATE'].astype(str) + pop_df['COUNTY'].astype(str).str.zfill(3)
    pop_df.sort_values(by=['POPESTIMATE2015'])
    pop_df['fips'] = pd.to_numeric(pop_df["fips"])
    return pop_df

  elif sub == 1:
    pop_df = pop_df[['SUMLEV', 'STATE', 'COUNTY', 'POPESTIMATE2015']].copy()
    pop_df = pop_df[pop_df['SUMLEV'] != 40]
    pop_df['fips'] = pop_df['STATE'].astype(str) + pop_df['COUNTY'].astype(str).str.zfill(3)
    pop_df.sort_values(by=['POPESTIMATE2015'])
    pop_df['fips'] = pd.to_numeric(pop_df["fips"])
    return pop_df

## Assign Covid Data to State/County

In [None]:
def save_map_img(df, date, col):
  """ Takes a geodataframe, date and column and generates a map image of the column's data on the specified date
  """

  if not os.path.exists("mappng"):
    os.mkdir("mappng")
    
  img = df.plot(figsize=(16, 16), column=col, cmap='Reds', legend=True,
                      legend_kwds={'label': "Percent (%)",
                                  'orientation': "horizontal",
                                  'pad': 0.01,
                                  'format': "%.0f"})

  img.set_xlim(-127,-66);
  img.set_ylim(24,50);
  img.set_title("% Of County Actively Infected with COVID-19, USA", fontsize=22);
  img.set_xticks([0])
  img.set_yticks([0])
  img.annotate(f"{date}", xy=(-82,47), fontsize=24);
  img.annotate("Created by Rohan Dawar", xy=(-120,27), fontsize=18)
  img.annotate("www.rohandawar.com", xy=(-120,26), fontsize=18)
  img.axis("off")
  
  img.figure.savefig(fname=f"mappng/{date}", bbox_inches='tight', pad_inches=0, dpi=80)
  plt.close('all')

In [None]:
def covidDf(sub, covidcsv):
  """ Takes relevant .csv with covid-19 data and produces a dataframe
  """

  r = requests.get(covidcsv)
  csv = r.content
  
  df = pd.read_csv(io.StringIO(csv.decode('utf-8')))
  
  if sub == 0: #state
    return df

  elif sub == 1: #county
    df = df[df['county'] != "Unknown"]
    df.loc[df['county'] == "New York City", 'fips'] = 36061
    return df

# df = df[df['county'] != "New York City"]
# df = df[df['county'] != "Kansas City"]
# df = df[df['county'] != "Joplin"]

# df[df['date'] == '2020-03-01']

# nulldf = df[df['fips'].isnull()]
# nulldf1 = nulldf[nulldf['county'] != 'New York City'] #fips -> 36061
# nulldf2 = nulldf1[nulldf1['county'] != 'Kansas City']
# nulldf3 = nulldf2[nulldf2['county'] != 'Joplin']

In [None]:
def pngloop(geodf, popdf, df):
  """ Takes the previously generated geodataframe, population dataframe and covid dataframe to generate map images for each date
  """
  
  datelist = df.date.unique()
  totalitems = len(datelist)
  for count, specificday in enumerate(datelist):
    start = timer()

    if 'cases' in geodf.columns:
        geodf = geodf.drop(labels=['cases'],axis=1)
    if 'nowcases' in geodf.columns:
        geodf = geodf.drop(labels=['nowcases'],axis=1)

    day_series = df[df['date'] == specificday]
    day_series = day_series.rename(columns={'cases' : "nowcases"}).drop(labels=['state', 'deaths'],axis=1)

    newdf_test = pd.merge(geodf, day_series, on="fips").fillna(0)
    newdf = pd.merge(newdf_test, popdf, on="fips")
    newdf['casesperpop'] = newdf['nowcases']*100/newdf['POPESTIMATE2015']

    save_map_img(newdf, specificday, 'casesperpop')
    
    end = timer()
    pctcomplete = (count/totalitems)*100
    clear_output() #os.system('cls')
    print(f"{round(pctcomplete, 2)}% - Saved Map for {specificday}, in {round(end - start, 2)}s")

  print("All PNG Files Complete")

## Standardize Image Dimensions

In [None]:
def resize():
  """ Requires files to be in the relevant folder, resizes all images to standardized dimensions
  """
  
  f = '/content/mappng'

  imgs = os.listdir(f)
  numfiles = len(imgs)
  heightlist = []

  for file in imgs:
    f_img = f+"/"+file
    img = Image.open(f_img)
    w, h = img.size
    heightlist.append(h)

  h = max(heightlist)

  for count, file in enumerate(imgs):
      f_img = f+"/"+file
      img = Image.open(f_img)
      img = img.resize((w,h))
      img.save(f_img)
      clear_output() #os.system('cls')
      print(f"Resized {count}/{numfiles} - {file}")
  print("All PNG Files Resized!")

## GIF MAKING

In [None]:
def toGIF(gifframe):
  """ Writes .gif from all .png files in relevant folder
  Args:
    gifframe: taken from main(gif) parameter, determines the duration of frames in .gif
  """

  #dir_path = os.path.dirname(os.path.realpath(__file__))

  foldername = "mappng"

  #mapfolder = os.path.join(dir_path, foldername)

  mapfolder = '/content/mappng'

  onlyfiles = [f for f in sorted(os.listdir(mapfolder)) if os.path.isfile(os.path.join(mapfolder, f))]

  # with frames as gifframe
  with imageio.get_writer('movie.gif', mode='I', duration=gifframe) as writer:
      for filename in onlyfiles:
          image = imageio.imread(os.path.join(foldername,filename))
          writer.append_data(image)
          clear_output() #os.system('cls')
          print(f'Stiched {filename}')
      print("GIF Stitched Successfully")

## Main Function

In [None]:
sfurl = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_5m.zip"
censuscsv = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv'
covidcsv = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'

In [None]:
def main(sub=1, rate=True, rsz=True, zip=False, gif=0.1, opt=False):
  """ Launches script and ultimately creates .gif with specified parameters
  Args:
    sub: subdivision to use; either:
      0: states
      1: counties (default)
      any other value: returns error message
    rate: whether to visualize the metric by rate (eg. cases per 100,000 population) or not (raw case numebrs)
      True (default)
      False
    resize: whether to resize the images to a standard dimension or not; either:
      True (default)
      False
    zip: whether to create a .zip directory including all the .png files;  either:
      True
      False (default)
    gif: whether or not to create a gif from .png files; either:
      0 (means no gif will be created)
      > 0 (default = 0.1); value determines seconds per frame of gif
    opt: whether or not to optimize the .gif, creating a smaller file but sacrificing image quality or colour gradiation; either:
      True
      False (default)
  """

  if sub == 1:
    sfurl = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_5m.zip"
    if rate:
      censuscsv = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv'
    covidcsv = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
  elif sub == 0:
    sfurl = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_5m.zip"
    if rate:
      censuscsv = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv'
    covidcsv = 'https://raw.github.com/nytimes/covid-19-data/master/us-states.csv'
  else:
    return print("Please enter either a sub value of either 0 (states) or 1 (counties)")

  if rate:
    pngloop(grabSF(sfurl), popCsvToDf(sub, censuscsv), covidDf(sub, covidcsv))
  else:
    pass #create viz without popcsv, raw data

  if rsz:
    resize()
  if zip:
    !zip -r /content/mappng.zip /content/mappng
    from google.colab import files
    files.download('mappng.zip')
  if gif > 0:
    toGIF(gif)
    if opt:
      print("Optimizing GIF...")
      optimize('/content/movie.gif')
      print("GIF Optimized!")

In [None]:
main()

# checks:, sub state and rates for each

Stiched 2021-01-17.png
GIF Stitched Successfully
