## COVID-19: Track, Map, and Animate the Coronavirus with Python & Basemap


### Introduction
Animate the progress. This is a great way to get involved and keep an eye on this developing story.


In [0]:
pip install kaggle

In [0]:
!kaggle config set -n path -v{/content}

In [0]:
!echo '{"username":"shukla84manish","key":"23a7cf2ddcb61c73c5dd0f87441582e3"}' > /root/.kaggle/kaggle.json

In [0]:
!kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset

In [0]:
!unzip \*.zip

In [0]:
!pip install Basemap

In [0]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime, time, requests
from time import sleep
from mpl_toolkits.basemap import Basemap

In [0]:
from mpl_toolkits.basemap import Basemap

In [0]:
!apt-get install libgeos-3.5.0
!apt-get install libgeos-dev
!pip install https://github.com/matplotlib/basemap/archive/master.zip

In [0]:
pip install pyproj==1.9.6

In [0]:
!apt install proj-bin libproj-dev libgeos-dev

In [0]:
!pip install https://github.com/matplotlib/basemap/archive/v1.1.0.tar.gz

In [0]:
from mpl_toolkits.basemap import Basemap

In [0]:
import matplotlib.pyplot as plt
from IPython.display import Image
Image(filename='/content/download.png', width='80%')

In [0]:
import mpl_toolkits
mpl_toolkits.__path__.append('/usr/lib/python2.7/dist-packages/mpl_toolkits/')
from mpl_toolkits.basemap import Basemap

In [0]:
!pip install -U git+https://github.com/matplotlib/basemap.git

In [0]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

In [0]:
covid_19_data = pd.read_csv('/content/covid_19_data.csv')
covid_19_data['ObservationDate'] = pd.to_datetime(covid_19_data['ObservationDate'])
covid_19_data = covid_19_data.sort_values('ObservationDate', ascending=True)
print('Shape:', covid_19_data.shape)
print('Date min:', np.min(covid_19_data['ObservationDate']), 'Date max:', np.max(covid_19_data['ObservationDate']))
# replace NaN Provinces with string
covid_19_data['Province/State'] = covid_19_data['Province/State'].fillna('No_Province')
covid_19_data.tail()

## Brief Data Prep and Exploration

In [0]:
# how many NaNs?
count_nan = len(covid_19_data) - covid_19_data.count()
count_nan

In [0]:
# how many countries do we have?
countries =list(set(covid_19_data['Country/Region']))
print('Unique Country/Regio found:', str(len(countries)))
countries

In [0]:
# how many province/states do we have?
zones =list((set(covid_19_data['Province/State'])))
print('Unique Province/State found:', str(len(zones)))

## Use openstreetmap Rest API to get lat/lon for each country

In [0]:
def get_lat_lon(zone, 
                output_as = 'center'):
    # thanks openstreetmap! 
    # create url
    url = '{0}{1}{2}'.format('http://nominatim.openstreetmap.org/search?q=',
                             zone,
                             '&format=json&polygon=0')
    # send out request
    response = requests.get(url).json()[0]

    # parse response to list
    if output_as == 'boundingbox':
        lst = response[output_as]
        output = [float(i) for i in lst]
    if output_as == 'center':
        lst = [response.get(key) for key in ['lon','lat']]
        output = [float(i) for i in lst]
        
    return output

In [0]:
get_lat_lon('India')

In [0]:
geo_centers_lon = []
geo_centers_lat = []
total_ctry = len(countries)
counter_ = 0
for ctry in countries:
    if counter_ % 10 == 0: print(total_ctry - counter_)
    time.sleep(0.2)
    centroid = [None, None]
    try:
        centroid = get_lat_lon(ctry, output_as='center')

    except:
        print('Could not find:', ctry)
        
    geo_centers_lon.append(centroid[0])
    geo_centers_lat.append(centroid[1])
        
     
    counter_ += 1

In [0]:
# Add geos back to data frame
full_lats = []
full_lons = []
for i, r in covid_19_data.iterrows():
    country = r['Country/Region']
    index_list = countries.index(country)
    full_lats.append(geo_centers_lat[index_list])
    full_lons.append(geo_centers_lon[index_list])
     
# add to data frame
covid_19_data['Longitude'] = full_lons
covid_19_data['Latitude'] = full_lats
covid_19_data.head(10)

In [0]:
covid_19_data[covid_19_data['Province/State'] == 'Shanghai']

## Plot Infection Counts by Country using Basemap
You may need to install Basemap on your machine:

https://matplotlib.org/basemap/users/installing.html

In [0]:
def plot_world_map(virus_data, date, save_to_file_name = ''):
    # Set the dimension of the figure
    #plt.figure(figsize=(16, 8))
    # Set the dimension of the figure
    my_dpi=96
    plt.figure(figsize=(2600/my_dpi, 1800/my_dpi), dpi=my_dpi)

    # Make the background map
    m=Basemap(llcrnrlon=-180, llcrnrlat=-65,urcrnrlon=180,urcrnrlat=80)
    m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
    m.fillcontinents(color='grey', alpha=0.3)
    m.drawcoastlines(linewidth=0.1, color="white")
    
    total_cases = np.sum(virus_data['Confirmed'])

    # Add a point per position
    m.scatter(virus_data['Longitude'], 
              virus_data['Latitude'], 
              s = virus_data['Confirmed'] * 8, # play around with the size or use np.log if you dont like the big circles
              alpha=0.4, 
              c=virus_data['labels_enc'], 
              cmap="Set1")

    plt.title(str(date) + ' Confirmed Covid-19 Cases: ' + str(int(total_cases)) + '\n(circles not to scale)', fontsize=50)
    
    if save_to_file_name != '':
        plt.savefig(save_to_file_name)
        
    plt.show()
    
    


In [0]:
# Create color map
# prepare a color for each point depending on the continent.
covid_19_data['labels_enc'] = pd.factorize(covid_19_data['Country/Region'])[0]
covid_19_data['labels_enc']

In [0]:
date = '2020-03-20' 

virus_up_to_today = covid_19_data[covid_19_data['ObservationDate'] <= date]

# simplify data set
virus_up_to_today = virus_up_to_today[['Country/Region','Province/State', 'labels_enc', 'Confirmed',
                     'Deaths', 'Recovered',
                     'Longitude', 'Latitude']]


# get totals by province then by country as these are cumulative values by province first then by country and not all countries have provinces

# group by country and sum/mean values
virus_up_to_today=virus_up_to_today.groupby(['Country/Region', 'Province/State', 'labels_enc']).agg({'Confirmed':'last', 
                           'Deaths':'last',
                           'Recovered':'last',
                           'Longitude':'mean',
                          'Latitude':'mean'}).reset_index()



# group by country and sum/mean values
virus_up_to_today=virus_up_to_today.groupby(['Country/Region', 'labels_enc']).agg({'Confirmed':'sum', 
                           'Deaths':'sum',
                           'Recovered':'sum',
                           'Longitude':'mean',
                          'Latitude':'mean'}).reset_index()

# map out confirmed cases
plot_world_map(virus_up_to_today, str(date)[0:10])

In [0]:
# build time lapse with accumulator count by country
dates = sorted(list(set(covid_19_data['ObservationDate'])))
dates

## Animation

In [0]:
image_file_name_counter = 0
for date in dates:
    virus_up_to_today = covid_19_data[covid_19_data['ObservationDate'] <= date]
    
    # simplify data set
    virus_up_to_today = virus_up_to_today[['Country/Region','Province/State', 'labels_enc', 'Confirmed',
                         'Deaths', 'Recovered',
                         'Longitude', 'Latitude']]


    # get totals by province then by country as these are cumulative values by province first then by country and not all countries have provinces

    # group by country and sum/mean values
    virus_up_to_today=virus_up_to_today.groupby(['Country/Region', 'Province/State', 'labels_enc']).agg({'Confirmed':'last', 
                               'Deaths':'last',
                               'Recovered':'last',
                               'Longitude':'mean',
                              'Latitude':'mean'}).reset_index()



    # group by country and sum/mean values
    virus_up_to_today=virus_up_to_today.groupby(['Country/Region', 'labels_enc']).agg({'Confirmed':'sum', 
                               'Deaths':'sum',
                               'Recovered':'sum',
                               'Longitude':'mean',
                              'Latitude':'mean'}).reset_index()
     
    # map out confirmed cases
    file_to_save_name = '/content/anim_' + str(image_file_name_counter) + '.png'
    plot_world_map(virus_up_to_today, str(date)[0:10], file_to_save_name)
    
  
    image_file_name_counter += 1
    
    

Animation
You may need to install FFMPEG on your machine: https://github.com/adaptlearning/adapt_authoring/wiki/Installing-FFmpeg

To make a video out of a series of image, use FFMPEG

If you need help installing FFMPEG:

https://github.com/adaptlearning/adapt_authoring/wiki/Installing-FFmpeg

In a command/terminal window in the same folder as your images, run:

$ffmpeg -framerate 10 -i "anim_%d.png" -pix_fmt yuv420p out.mp4

In [0]:
!pip install FFmpeg

In [0]:
pwd

In [0]:
!ffmpeg -framerate 10 -i "anim_%d.png" -pix_fmt yuv420p out.mp4