In [None]:
pip install folium geopandas 

In [84]:
### for generating overall number of cases choropleth plot
import folium
import pandas as pd
import numpy as np
import geopandas as gpd

# make a geojason df for sd
geo_data_file = r'/data/bp-codeathon-team-2/sd.geojson'
with open(geo_data_file, 'r') as jsonFile:
    geo_df=gpd.read_file(jsonFile)
geo_df = geo_df.rename(columns = {"zip":"zipcode"})

sd_zip_file = r'/data/bp-codeathon-team-2/sd_zip_complete.geojson'
with open(sd_zip_file, 'r') as jsonFile:
    sd_zip_df=gpd.read_file(jsonFile)
    
sd_zip_df = sd_zip_df.dropna()


# make the data df
data_file = r'/data/bp-codeathon-team-2/2022_sd_genomic_metadata.csv'
data_df = pd.read_csv(data_file)
data_df['zipcode'] = data_df['zipcode'].astype(str)

numCasesSeries = data_df.groupby('zipcode').count().nextclade_pango
numCasesByZip = pd.DataFrame()
numCasesByZip['zipcode'] = [str(i) for i in numCasesSeries.index]
numCasesByZip['numcases'] = numCasesSeries.values
# the most dominant lineage
numCasesByZip['lineage'] = list(data_df.groupby('zipcode')['nextclade_pango'].agg(lambda x:x.value_counts().index[0]))

# for add up all numcases per zipcode for the same community
geo_df = pd.merge(geo_df, numCasesByZip, on=['zipcode'])
gb = geo_df.groupby('community')['numcases']
geo_df['numcases'] = gb.transform('sum')

geo_df['tooltip1'] = geo_df['community'].astype(str) + ', ' + geo_df['numcases'].astype(str) \
                    + ",   " + geo_df['lineage'].astype(str)


# create the initial map
sd_map = folium.Map(location=[32.7157, -117.1611], zoom_start=9)

# adjust the color bin value
bin=np.linspace(min(geo_df['numcases']),max(geo_df['numcases']),10)
bin[1] = 99
bin[2] = 199

sd_zip_df = pd.merge(sd_zip_df, numCasesByZip, on=['zipcode'])
gb = sd_zip_df.groupby('community')['numcases']
sd_zip_df['numcases'] = gb.transform('sum')

## create the choropleth map
choropleth = folium.Choropleth(
        geo_data=sd_zip_df,
        name='choropleth',
        data=geo_df,
        columns=['zipcode','numcases'],
        key_on='feature.properties.zipcode',
        fill_color='OrRd',
        fill_opacity=0.7,
        line_opacity=0.8,
        line_color='Blue',
        legend_name='number of cases',
        bins=bin,
        highlight=True
    ).add_to(sd_map)

choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(
                   fields=['community',
                           'lineage',
                           'numcases',],
                   aliases=["community:",
                            'dominant lineage:',
                            "# of cases: ",], 
                   localize=True,
                   sticky=False,
                   labels=True,
                   max_width=400,)
)


<folium.features.GeoJson at 0x7fd753add520>

In [82]:
sd_map
sd_map.save('choropleth.html')

In [83]:
### for generating overall dominant choropleth plot
import folium
import pandas as pd
import numpy as np
import geopandas as gpd

# make a geojason df for sd
geo_data_file = r'/data/bp-codeathon-team-2/sd.geojson'
with open(geo_data_file, 'r') as jsonFile:
    geo_df=gpd.read_file(jsonFile)
geo_df = geo_df.rename(columns = {"zip":"zipcode"})

sd_zip_file = r'/data/bp-codeathon-team-2/sd_zip_complete.geojson'
with open(sd_zip_file, 'r') as jsonFile:
    sd_zip_df=gpd.read_file(jsonFile)
    
sd_zip_df = sd_zip_df.dropna()

# make the data df
data_file = r'/data/bp-codeathon-team-2/2022_sd_genomic_metadata.csv'
data_df = pd.read_csv(data_file)
data_df['zipcode'] = data_df['zipcode'].astype(str)

numCasesSeries = data_df.groupby('zipcode').count().nextclade_pango
numCasesByZip = pd.DataFrame()
numCasesByZip['zipcode'] = [str(i) for i in numCasesSeries.index]
numCasesByZip['numcases'] = numCasesSeries.values
# the most dominant lineage
numCasesByZip['lineage'] = list(data_df.groupby('zipcode')['nextclade_pango'].agg(lambda x:x.value_counts().index[0]))

lineage_color_dict = dict(zip(numCasesByZip['lineage'].value_counts().index, \
                              numCasesByZip['lineage'].value_counts().values))

numCasesByZip['lineage_value'] = numCasesByZip['lineage'].apply(lambda x:lineage_color_dict[x])

# to make BA.1.15 stands out more in color
filter = numCasesByZip['lineage'] == 'BA.1.15'
numCasesByZip.loc[filter,'lineage_value'] = numCasesByZip.loc[filter,'lineage_value'] + 70


# for add up all numcases per zipcode for the same community
geo_df = pd.merge(geo_df, numCasesByZip, on=['zipcode'])
gb = geo_df.groupby('community')['numcases']
geo_df['numcases'] = gb.transform('sum')


sd_zip_df = pd.merge(sd_zip_df, numCasesByZip, on=['zipcode'])
gb = sd_zip_df.groupby('community')['numcases']
sd_zip_df['numcases'] = gb.transform('sum')


# create the initial map
sd_map_2 = folium.Map(location=[32.7157, -117.1611], zoom_start=9)
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(sd_map_2)


bin=np.linspace(min(numCasesByZip['lineage_value']),max(numCasesByZip['lineage_value']),4)

choropleth = folium.Choropleth(
        geo_data=sd_zip_df,
        name='choropleth',
        data=numCasesByZip,
        columns=['zipcode','lineage_value'],
        key_on='feature.properties.zipcode',
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.8,
        bins = bin,
        nan_fill_color="White",
        highlight=True
    ).add_to(sd_map_2)

choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(
                   fields=['community',
                           'lineage',
                           'median_household_income',
                           'obesity_crudeprev',],
                   aliases=["community:",
                            'dominant lineage:',
                            'med_household_income:',
                            "Prevalence of Obesity: ",], 
                   localize=True,
                   sticky=False,
                   labels=True,
                   max_width=400,)
)

<folium.features.GeoJson at 0x7fd761559c10>

In [69]:
sd_map_2
sd_map_2.save('choropleth_dominant_lineage.html')