In [1]:
! pip install pymongo



In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from datetime import datetime
import folium
from folium.plugins import MarkerCluster
import warnings
warnings.filterwarnings("ignore")


def import_county():
    """
    This function imports data from MongoDB 'CDC-TimeSeries' collection and creates a dataframe that only holds US data to the county level. 
    
    I have added 2 extra columns (Confirmed_New and Death_New) that show the count of new cases/deaths each day
    since the original data only has the cumulative number. 
    
    I think this function would help you jump into your county-level data analysis right away.
    
    """
    import pymongo
    from pymongo import MongoClient
    import pandas as pd
    
    auth = "mongodb://analyst:grmds@3.101.18.8/COVID19-DB"
    db_name = 'COVID19-DB'
    
    client = pymongo.MongoClient(auth) # defaults to port 27017
    db = client[db_name]
    cdc_ts = pd.DataFrame(list(db['CDC-TimeSeries'].find({})))
    
    
    US = cdc_ts.loc[cdc_ts['Country/Region'] == 'US']
    US = US.loc[US['County/City'] != ""]
    US = US.sort_values(by=['Province/State', 'County/City', 'Date']).reset_index(drop = True)
    US = US.drop(['Country/Region'], axis=1)
    
    US['Confirmed'] = US['Confirmed'].astype(int)
    US['Death'] = US['Death'].astype(int)

    US['Confirmed_New'] = US.groupby(['Province/State','County/City'])['Confirmed'].diff().fillna(0)
    US['Death_New'] = US.groupby(['Province/State','County/City'])['Death'].diff().fillna(0)

    col_name="Date"
    col = US.pop(col_name)
    US.insert(0, col_name, col)
    
    col_name="Confirmed_New"
    col = US.pop(col_name)
    US.insert(7, col_name, col)
    
    col_name="Death_New"
    col = US.pop(col_name)
    US.insert(9, col_name, col)
   
    return US

In [0]:
import pymongo
import pandas as pd
auth = "mongodb://analyst:grmds@3.101.18.8/COVID19-DB"
db_name = 'COVID19-DB'

client = pymongo.MongoClient(auth) # defaults to port 27017
db = client[db_name]
cdc_ts = pd.DataFrame(list(db['counties'].find({})))
#mp=cdc_ts.sort_values('population_density_people/mi^2',ascending=False)[:20]
#mp.to_csv('mp.csv')

In [0]:
import pymongo
county=import_county()
county['state_county'] = county['County/City'] + "-" + county['Province/State'] 

In [0]:
mp=pd.read_csv('0503mp.csv')
mp.rename(columns={'State_County':'state_county'},inplace=True)
mp=mp['state_county']

In [0]:
temp=pd.merge(mp,county,how='inner',on='state_county').reset_index()
temp=temp[['state_county','Latitude','Longitude']]
temp=temp.drop_duplicates()
temp.reset_index(inplace=True)

In [11]:
countylist=county[county['County/City']!='Unassigned']
countylist=countylist[['state_county','Latitude','Longitude']]
countylist.drop_duplicates(inplace=True)
countylist.reset_index(inplace=True)
## Add in SoutherEast-Utah for missing values
countylist.loc[2850,'Latitude']=37.096
countylist.loc[2850,'Longitude']=113.56
countylist.head()

Unnamed: 0,index,state_county,Latitude,Longitude
0,0,Autauga-Alabama,32.53952745,-86.64408227
1,97,Baldwin-Alabama,30.72774991,-87.72207058
2,194,Barbour-Alabama,31.868263,-85.3871286
3,291,Bibb-Alabama,32.99642064,-87.12511459999996
4,388,Blount-Alabama,33.98210918,-86.56790593


In [0]:
import math

def distance(lat1,lon1,lat2,lon2):
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d

In [13]:
#Create Empty DataFrame
test=pd.DataFrame(columns=mp)
test['County']=countylist['state_county']
test

state_county,New York-New York,Kings-New York,Bronx-New York,Queens-New York,San Francisco-California,Hudson-New Jersey,Suffolk-Massachusetts,Philadelphia-Pennsylvania,Columbia-Washington,Alexandria-Virginia,Arlington-Virginia,Richmond-New York,Baltimore-Maryland,Falls Church-Virginia,Manassas Park-Virginia,Essex-New Jersey,Cook-Illinois,Union-New Jersey,St. Louis-Missouri,Nassau-New York,County
0,,,,,,,,,,,,,,,,,,,,,Autauga-Alabama
1,,,,,,,,,,,,,,,,,,,,,Baldwin-Alabama
2,,,,,,,,,,,,,,,,,,,,,Barbour-Alabama
3,,,,,,,,,,,,,,,,,,,,,Bibb-Alabama
4,,,,,,,,,,,,,,,,,,,,,Blount-Alabama
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3199,,,,,,,,,,,,,,,,,,,,,Sweetwater-Wyoming
3200,,,,,,,,,,,,,,,,,,,,,Teton-Wyoming
3201,,,,,,,,,,,,,,,,,,,,,Uinta-Wyoming
3202,,,,,,,,,,,,,,,,,,,,,Washakie-Wyoming


In [0]:
R = 6373.0
result=[]
for i in countylist['state_county']:
    latcounty=float(countylist[countylist['state_county']==i]['Latitude'])
    longcounty=float(countylist[countylist['state_county']==i]['Longitude'])
    for n in range(0,len(temp)):
      county=temp.loc[n,'state_county']
      latmp=float(temp.loc[n,'Latitude'])
      longmp=float(temp.loc[n,'Longitude'])
      d=distance(latcounty,longcounty,latmp,longmp)
      test.loc[test['County']==i,county]=d

In [15]:
test.head()

state_county,New York-New York,Kings-New York,Bronx-New York,Queens-New York,San Francisco-California,Hudson-New Jersey,Suffolk-Massachusetts,Philadelphia-Pennsylvania,Columbia-Washington,Alexandria-Virginia,Arlington-Virginia,Richmond-New York,Baltimore-Maryland,Falls Church-Virginia,Manassas Park-Virginia,Essex-New Jersey,Cook-Illinois,Union-New Jersey,St. Louis-Missouri,Nassau-New York,County
0,1451.65,1444.87,1464.56,1458.79,3285.28,1442.62,1748.29,1321.97,3061.44,1109.18,1111.94,1427.89,1183.31,1107.32,1081.09,1434.04,1039.5,1421.59,760.056,1476.5,Autauga-Alabama
1,1665.39,1657.99,1678.39,1671.91,3265.4,1656.47,1962.1,1535.24,3115.55,1322.23,1325.39,1641.27,1398.21,1321.05,1295.11,1648.5,1235.82,1635.66,913.994,1689.21,Baldwin-Alabama
2,1420.74,1412.74,1433.82,1426.64,3422.37,1411.97,1716.95,1290.45,3200.23,1077.92,1081.49,1396.31,1155.63,1077.47,1051.94,1404.67,1129.7,1391.44,881.441,1443.51,Barbour-Alabama
3,1451.72,1445.56,1464.5,1459.44,3225.68,1442.59,1747.74,1322.82,2994.54,1110.93,1113.28,1428.37,1182.91,1108.38,1081.97,1433.41,985.406,1421.38,694.939,1477.5,Bibb-Alabama
4,1342.51,1336.87,1355.16,1350.69,3239.46,1333.31,1637.66,1214.48,2968.23,1003.9,1005.88,1319.53,1073.85,1000.76,974.278,1323.67,880.727,1312.01,623.33,1369.02,Blount-Alabama


In [0]:
#test.to_csv('Top20.csv')