# Import relevant libraries

In [1]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt 
from matplotlib import rc
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
from pyproj import CRS
import folium
from folium import plugins
from folium.plugins import MarkerCluster, TimestampedGeoJson
import shapely
from shapely.geometry import Point, Polygon

# Load UK shape file

In [2]:
uk = gpd.read_file("London_Borough_Excluding_MHW.json")
crs_27700 = CRS("EPSG:27700")

uk.crs = crs_27700
uk = uk[['NAME', 'geometry']]
uk = uk.to_crs("EPSG:4326")

uk = uk[['NAME', 'geometry']]
uk.rename(columns={'NAME': 'district'}, inplace=True)

In [3]:
# set file path
base_path = '../..'
file_path = os.path.join(base_path, 
                         'datasets',
                         'tech_roundabout_coordinates.csv')

# load data
tech_roundabout = pd.read_csv(file_path)

In [4]:
# set file path
file_path = os.path.join(base_path, 
                         'datasets',
                         'industry_added_cleaned_data.csv')

# load data
iac = pd.read_csv(file_path)

In [5]:
# set file path
file_path = os.path.join(base_path, 
                         'datasets',
                         'tech_industry_added_cleaned_data.csv')

# load data
tech_industries = pd.read_csv(file_path)

In [6]:
# set-up coordinates
tech_roundabout_coordinates = tech_roundabout[['CompanyName', 'latitude', 'longitude']]

tech_roundabout_coordinates = gpd.GeoDataFrame(
    tech_roundabout_coordinates.drop(['latitude','longitude'], axis = 1),
    geometry=[Point(xy) for xy in zip(tech_roundabout_coordinates.longitude,
                                      tech_roundabout_coordinates.latitude)])

In [7]:
# project to epsg:4326 --  = crs_27700
tech_roundabout_coordinates.set_crs(crs='EPSG:4326',
                                    inplace=True,
                                    allow_override=True)

Unnamed: 0,CompanyName,geometry
0,!BIG IMPACT GRAPHICS LIMITED,POINT (-0.07869 51.52702)
1,"""K"" LINE (EUROPE) LIMITED",POINT (-0.09736 51.51757)
2,"""K"" LINE BULK SHIPPING (UK) LIMITED",POINT (-0.09736 51.51757)
3,"""K"" LINE HOLDING (EUROPE) LIMITED",POINT (-0.09736 51.51757)
4,"""K"" LINE LNG SHIPPING (UK) LIMITED",POINT (-0.09736 51.51757)
...,...,...
97964,ÉCLAT CENTURY SERVICES LIMITED,POINT (-0.08881 51.52725)
97965,ÉLEVAGE JOLIVET LTD,POINT (-0.08881 51.52725)
97966,ÜVIEW LIMITED,POINT (-0.08371 51.52559)
97967,‘ADORE BOURNEMOUTH LTD,POINT (-0.08013 51.52639)


# Spatial join

In [8]:
# get district
tech_roundabout_full = gpd.sjoin(tech_roundabout_coordinates,
                                 uk,
                                 how='left',
                                 op='within').drop(columns='index_right')

# remove data points that are not in the uk --- bug when request for coordinate
tech_roundabout_full = tech_roundabout_full[~tech_roundabout_full.district.isna()]

# remove irrelevant data point --- postcode provided cannot be found on Google Map
tech_roundabout_full = tech_roundabout_full[~tech_roundabout_full['district'].
                                            isin(['Brentwood', 'Sheffield'])]

# Clean company data

In [9]:
tech_roundabout_district = uk[uk['district'].isin(tech_roundabout_full['district'].unique())]

In [10]:
df_tech = pd.merge(tech_industries,
                   tech_roundabout,
                   on='CompanyName',
                   how='inner')
df_tech = pd.merge(df_tech,
                   tech_roundabout_full,
                   on='CompanyName',
                   how='inner')
df_tech.head(3)
df_tech = df_tech.drop(columns=['IncorporationDate', 'month', 'CompanyNumber', 'sector_code_1', 'count'])
df_tech = df_tech.drop_duplicates()

df_tech_gdf = GeoDataFrame(df_tech)
df_tech_gdf['sector_name_1'] = df_tech_gdf['sector_name_1'].str.strip()
df_tech_gdf.head()
# 'year','IncorporationDate',  'CompanyStatus'

Unnamed: 0,year,CompanyName,CompanyStatus,sector_name_1,Industry,RegAddress.PostCode,RegAddress.PostTown,latitude,longitude,geometry,district
0,1991,MACXPERTS LIMITED,Active,Info Tech Consulting,informatin Tech,EC1Y 4SE,LONDON,51.521022,-0.090913,POINT (-0.09091 51.52102),Islington
1,1991,PREMIER TRAINING LIMITED,Active,Info Tech Consulting,informatin Tech,EC1V 2NX,LONDON,51.527246,-0.088808,POINT (-0.08881 51.52725),Islington
2,1991,INFIELD SYSTEMS LIMITED,Active,Data Companies,informatin Tech,EC2A 2EW,LONDON,51.52106,-0.080057,POINT (-0.08006 51.52106),City of London
3,1991,A C F FINANCIAL TECHNOLOGY LIMITED,Active,Software Dev.,informatin Tech,EC2V 8EH,LONDON,51.514624,-0.092348,POINT (-0.09235 51.51462),City of London
4,1991,CAPITA LAND LIMITED,Active,Software Dev.,informatin Tech,EC2V 7NQ,LONDON,51.515626,-0.093661,POINT (-0.09366 51.51563),City of London


## Create mapping dictionary

In [11]:
map_dict = {
    'color': {
        'Biotech Rsrch.': 'darkgreen',
        'Business Software Dev.': 'lightgreen',
        'Computer Game Companies': 'lightblue',
        'Computer facilities mgmt.': 'gray',
        'Data Companies': 'darkpurple',
        'Fund management activities': 'orange',
        'Info Tech Consulting': 'lightred',
        'Info Tech services': 'cadetblue',
        'Natural Sciences Rsrch.': 'beige',
        'Open-ended Investors': 'purple',
        'Satellite Telecommuncations': 'green',
        'Social Sciences Rsrch.': 'pink',
        'Software Dev.': 'lightgray',
        'Technical testing and analysis': 'green',
        'Venture Capital': 'blue',
        'Web portals': 'darkblue',
        'Wireless Telecommuncations': 'orange',
    },
    'icon': {
        'Biotech Rsrch.': 'leaf',
        'Business Software Dev.': 'wrench',
        'Computer Game Companies': 'gamepad',
        'Computer facilities mgmt.': 'laptop',
        'Data Companies': 'table',
        'Fund management activities': 'building',
        'Info Tech Consulting': 'server',
        'Info Tech services': 'signal',
        'Natural Sciences Rsrch.': 'pagelines',
        'Open-ended Investors': 'suitcase',
        'Satellite Telecommuncations': 'bars',
        'Social Sciences Rsrch.': 'users',
        'Software Dev.': 'code',
        'Technical testing and analysis': 'comment',
        'Venture Capital': 'university',
        'Web portals': 'internet-explorer',
        'Wireless Telecommuncations': 'wifi'
    }
}

## Add new column for colors

In [12]:
def color(df):
    for key in map_dict['color'].keys():
        if df['sector_name_1'] == key:
            return map_dict['color'][key]
        elif df['sector_name_1'] not in map_dict['color'].keys():
            return 'black'

df_tech_gdf['color'] = df_tech_gdf.apply(color, axis=1)
df_tech_gdf.head(3)

Unnamed: 0,year,CompanyName,CompanyStatus,sector_name_1,Industry,RegAddress.PostCode,RegAddress.PostTown,latitude,longitude,geometry,district,color
0,1991,MACXPERTS LIMITED,Active,Info Tech Consulting,informatin Tech,EC1Y 4SE,LONDON,51.521022,-0.090913,POINT (-0.09091 51.52102),Islington,lightred
1,1991,PREMIER TRAINING LIMITED,Active,Info Tech Consulting,informatin Tech,EC1V 2NX,LONDON,51.527246,-0.088808,POINT (-0.08881 51.52725),Islington,lightred
2,1991,INFIELD SYSTEMS LIMITED,Active,Data Companies,informatin Tech,EC2A 2EW,LONDON,51.52106,-0.080057,POINT (-0.08006 51.52106),City of London,darkpurple


## Add new column for icons

In [13]:
def icon(df):
    for key in map_dict['icon'].keys():
        if df['sector_name_1'] == key:
            return map_dict['icon'][key]
        elif df['sector_name_1'] not in map_dict['icon'].keys():
            return 'chart-line'

df_tech_gdf["icons"] = df_tech_gdf.apply(icon, axis=1)
df_tech_gdf.head(3)

Unnamed: 0,year,CompanyName,CompanyStatus,sector_name_1,Industry,RegAddress.PostCode,RegAddress.PostTown,latitude,longitude,geometry,district,color,icons
0,1991,MACXPERTS LIMITED,Active,Info Tech Consulting,informatin Tech,EC1Y 4SE,LONDON,51.521022,-0.090913,POINT (-0.09091 51.52102),Islington,lightred,server
1,1991,PREMIER TRAINING LIMITED,Active,Info Tech Consulting,informatin Tech,EC1V 2NX,LONDON,51.527246,-0.088808,POINT (-0.08881 51.52725),Islington,lightred,server
2,1991,INFIELD SYSTEMS LIMITED,Active,Data Companies,informatin Tech,EC2A 2EW,LONDON,51.52106,-0.080057,POINT (-0.08006 51.52106),City of London,darkpurple,table


## Visualisation through Folium

In [None]:
# set up the map
m = folium.Map(location=[51.51757, -0.08881],
               zoom_start=20,
               min_zoom=10,
               prefer_canvas=True,
               control_scale=True)

# set up the values
long = df_tech_gdf.longitude
lati = df_tech_gdf.latitude
coname = df_tech_gdf['CompanyName']
color = df_tech_gdf['color']
se = df_tech_gdf.sector_name_1
icon = df_tech_gdf['icons']

# adding different layers for the checkbox

# get sector_list
sector_list = list(map_dict['icon'].keys())

# get sector_map
sector_map = {}
for i, sector in enumerate(sector_list):
    sector_map[sector] = i + 1

# generate features
for sector in sector_map.keys():
    var_name = '_'.join(['feature', str(sector_map[sector])])
    vars()[var_name] = folium.FeatureGroup(name=sector)

# add elements to each feature
for lat, lon, name, col, se, name, ico in zip(lati, long, coname, color, se,
                                              coname, icon):
    for sector in sector_map.keys():
        if se == sector:
            feature = '_'.join(['feature', str(sector_map[sector])])
            folium.Marker(location=[lat, lon],
                          tooltip=name,
                          icon=folium.Icon(color=col,
                                           icon=ico,
                                           angle=0,
                                           prefix='fa')).add_to(vars()[feature])

# add features to the map
for sector in sector_map.keys():
    feature = '_'.join(['feature', str(sector_map[sector])])
    vars()[feature].add_to(m)

folium.LayerControl(collapsed=False).add_to(m)

#saving map as html
file_path = os.path.join('output', 'geospatial.html')
m.save(file_path)
print(f"The html file is saved to {file_path}")