In [None]:
import sqlalchemy
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import folium
import os
import squarify
from difflib import get_close_matches

uri = 'mysql://uk-project:rchi2019@localhost/uk-data'
path = 'C:/Users/jbutl20/Desktop/'

month_labels = []

for year in ['17', '18']:
    for month in  ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']:
        month_labels.append("{} {}".format(month, year))

In [None]:
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
ccg_df = gpd.read_file('uk_ccg_2018.geojson')
ccg_df['lc_ccg18nm'] = ccg_df.ccg18nm.apply(lambda x: x.lower())
ccg_df.crs

In [None]:
header = ['ccg_code', 'name']
ccg_sites_df = pd.read_csv('eccgsites.csv', header=None)
ccg_sites_df.drop(ccg_sites_df.columns[2:], inplace=True, axis=1)
ccg_sites_df.columns = header
ccg_sites_df['lc_name'] = ccg_sites_df.name.apply(lambda x: x.lower())
ccg_sites_df.set_index('lc_name', inplace=True)
ccg_sites_df.head()

In [None]:
ccg_df['match'] = ccg_df.lc_ccg18nm.map(lambda x: get_close_matches(x, ccg_sites_df.index, n=1))
ccg_df.match = ccg_df.match.apply(lambda x: x[0] if x else '')
merged_df = ccg_sites_df.join(ccg_df.set_index('match'), how='right')
merged_df.reset_index(inplace=True)
merged_df.drop(columns=['lc_ccg18nm','match'], inplace=True)

In [None]:
uk_population = pd.read_csv(os.path.join(path, r'uk-population-by-ccg.csv'))
columns = ['date', 'ccg_name','geography_code','rural_urban','total_population','male',
           'female','lives_household','lives_communal','school_age','area_hectares','density_pph','note']
uk_population.columns = columns
new_df = uk_population.join(merged_df.set_index('ccg18cd'), on='geography_code', how='inner')

In [None]:
# Convert back to GeoPandas dataframe

crs = {'init': 'epsg:4326'}
new_gdf = gpd.GeoDataFrame(new_df, crs=crs, geometry=new_df.geometry)

In [None]:
# Calculate area and density

new_gdf['calculated_area'] = new_gdf['geometry'].to_crs({'init':'epsg:27700'}).apply(lambda x: round(x.area/10**4,2))
new_gdf['calculated_density'] = new_gdf.apply(lambda x: round(x['total_population']/x['calculated_area'],1), axis=1)
new_gdf['per_diff'] = new_gdf.apply(lambda x: round(((x['calculated_area']-x['area_hectares'])/x['calculated_area'])*100,1), axis=1)
new_gdf.loc[:,['ccg18nm','total_population','area_hectares','calculated_area','per_diff','density_pph','calculated_density','note']]

In [None]:
new_gdf.loc[:,['ccg18nm','total_population','per_communal','area_hectares','calculated_area','per_diff','density_pph','calculated_density','note']]

In [None]:
# Other Calculations

new_gdf['per_communal'] = new_gdf.apply(lambda x: round((x['lives_communal']/x['total_population'])*100,1), axis=1)

In [None]:
# for inspection in Excel
new_df_without_geometry = new_gdf.drop(['geometry','ccg18nm','name'], axis=1)
new_df_without_geometry.to_csv(os.path.join(path,r'uk-ccg-with-population.csv'), index=False)

In [None]:
final_gdf = new_gdf.drop(['ccg18nm','name'], axis=1)
final_gdf.to_file(os.path.join(path, r'new_uk_ccg_2018-1.geojson'), driver="GeoJSON")

In [None]:
# Custom style for Folium
def style_function(feature):
    data = data_series.get

In [None]:
uk_centroid = [53.8060835,-1.6057716]
m = folium.Map(uk_centroid, zoom_start=6)
the_map = folium.Choropleth(geo_data=final_gdf, data=final_gdf, columns=['ccg_code','calculated_density'], key_on='properties.ccg_code',
                  fill_color='YlOrRd', fill_opacity=0.7, line_opacity=0.6, bins=8, nan_fill_color='Pink', tooltip=tooltip_info,
                  legend_name='Population Density (Person per Hectares)', highlight=True).add_to(m)
tooltip_info = folium.GeoJsonTooltip(['ccg_name','total_population','calculated_density'],aliases=['Region','Population','Density']).add_to(the_map.geojson)
m