# Import relevant libraries

In [126]:
import os
import json
import matplotlib.pyplot as plt 
import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import Polygon
from pyproj import CRS
from shapely.geometry import Point
import contextily as ctx
import numpy as np
from geopandas import GeoDataFrame
from matplotlib import rc
import folium
from folium.plugins import MarkerCluster

# Load UK shape file

In [127]:
uk = gpd.read_file("London_Borough_Excluding_MHW.json")
crs_27700 = CRS("EPSG:27700")

uk.crs = crs_27700
uk = uk[['NAME', 'geometry']]
uk = uk.to_crs("EPSG:4326")

uk = uk[['NAME', 'geometry']]
uk.rename(columns = {'NAME': 'district'}, inplace = True)

In [128]:
# set file path
base_path = '../..'
file_path = os.path.join(base_path, 'datasets', 'tech_roundabout_coordinates.csv')

# load data
tech_roundabout = pd.read_csv(file_path)

In [129]:
# set file path
base_path = '../..'
file_path = os.path.join(base_path, 'datasets', 'industry_added_cleaned_data.csv')

# load data
iac = pd.read_csv(file_path)

In [130]:
# set file path
base_path = '../..'
file_path = os.path.join(base_path, 'datasets', 'tech_industry_added_cleaned_data.csv')

# load data
tech_industries = pd.read_csv(file_path)

In [131]:
# set-up coordinates
tech_roundabout_coordinates = tech_roundabout[['CompanyName', 'latitude', 'longitude']]

tech_roundabout_coordinates = gpd.GeoDataFrame(
    tech_roundabout_coordinates.drop(['latitude','longitude'], axis = 1),
    geometry=[Point(xy) for xy in zip(tech_roundabout_coordinates.longitude,
                                      tech_roundabout_coordinates.latitude)])

In [132]:
# project to epsg:4326 --  = crs_27700
tech_roundabout_coordinates.set_crs(crs = 'EPSG:4326',epsg=27700, inplace=True, allow_override=True)

Unnamed: 0,CompanyName,geometry
0,!BIG IMPACT GRAPHICS LIMITED,POINT (-0.07869 51.52702)
1,"""K"" LINE (EUROPE) LIMITED",POINT (-0.09736 51.51757)
2,"""K"" LINE BULK SHIPPING (UK) LIMITED",POINT (-0.09736 51.51757)
3,"""K"" LINE HOLDING (EUROPE) LIMITED",POINT (-0.09736 51.51757)
4,"""K"" LINE LNG SHIPPING (UK) LIMITED",POINT (-0.09736 51.51757)
...,...,...
97964,ÉCLAT CENTURY SERVICES LIMITED,POINT (-0.08881 51.52725)
97965,ÉLEVAGE JOLIVET LTD,POINT (-0.08881 51.52725)
97966,ÜVIEW LIMITED,POINT (-0.08371 51.52559)
97967,‘ADORE BOURNEMOUTH LTD,POINT (-0.08013 51.52639)


# Spatial join

In [133]:
# get district
tech_roundabout_full = gpd.sjoin(tech_roundabout_coordinates, uk, 
                                 how = 'left', op = 'within').drop(columns = 'index_right')

# remove data points that are not in the uk --- bug when request for coordinate
tech_roundabout_full = tech_roundabout_full[~tech_roundabout_full.district.isna()]

# remove irrelevant data point --- postcode provided cannot be found on Google Map
tech_roundabout_full = tech_roundabout_full[~tech_roundabout_full['district'].isin(['Brentwood', 'Sheffield'])]

# Visualize data

In [134]:
tech_roundabout_district = uk[uk['district'].isin(tech_roundabout_full['district'].unique())]

In [135]:
df_tech = pd.merge(tech_industries , tech_roundabout, on='CompanyName', how='inner')
df_tech = pd.merge(df_tech, tech_roundabout_full, on='CompanyName', how='inner')
df_tech.head(3)
df_tech  = df_tech.drop(columns = ['IncorporationDate', 'year', 'month', 'CompanyNumber', 'CompanyStatus', 'sector_code_1', 'count'])
df_tech = df_tech.drop_duplicates()

df_tech_gdf = GeoDataFrame(df_tech)
df_tech_gdf['sector_name_1'] = df_tech_gdf['sector_name_1'].str.strip()

def district(counter):
    if counter['sector_name_1'] == 'Biotech Rsrch.':
        return 'darkgreen'
    elif counter['sector_name_1'] == 'Business Software Dev.':
        return 'lightgreen'
    elif counter['sector_name_1'] == 'Computer Game Companies':
        return 'lightblue'
    elif counter['sector_name_1'] == 'Computer facilities mgmt.':
        return 'gray'
    elif counter['sector_name_1'] == 'Data Companies':
        return 'darkpurple'
    elif counter['sector_name_1'] == 'Fund management activities':
        return 'orange'
    elif counter['sector_name_1'] == 'Info Tech Consulting':
        return 'lightred'
    elif counter['sector_name_1'] == 'Info Tech services':
        return 'cadetblue'
    elif counter['sector_name_1'] == 'Natural Sciences Rsrch.':
        return 'beige'
    elif counter['sector_name_1'] == 'Open-ended Investors':
        return 'purple'
    elif counter['sector_name_1'] == 'Satellite Telecommunications':
        return 'white'
    elif counter['sector_name_1'] == 'Social Sciences Rsrch.':
        return 'pink'
    elif counter['sector_name_1'] == 'Software Dev.':
        return 'lightgray' 
    elif counter['sector_name_1'] == 'Technical testing and analysis':
        return 'green'
    elif counter['sector_name_1'] == 'Venture Capital':
        return 'blue'
    elif counter['sector_name_1'] == 'Web portals':
        return 'darkblue'
    elif counter['sector_name_1'] == 'Wireless Telecommunications':
         return 'orange'
    else:
        return 'black'

df_tech_gdf["color"] = df_tech_gdf.apply(district, axis=1)
df_tech_gdf.head(3)

Unnamed: 0,CompanyName,sector_name_1,Industry,RegAddress.PostCode,RegAddress.PostTown,latitude,longitude,geometry,district,color
0,MACXPERTS LIMITED,Info Tech Consulting,informatin Tech,EC1Y 4SE,LONDON,51.521022,-0.090913,POINT (-0.09091 51.52102),Islington,lightred
1,PREMIER TRAINING LIMITED,Info Tech Consulting,informatin Tech,EC1V 2NX,LONDON,51.527246,-0.088808,POINT (-0.08881 51.52725),Islington,lightred
2,INFIELD SYSTEMS LIMITED,Data Companies,informatin Tech,EC2A 2EW,LONDON,51.52106,-0.080057,POINT (-0.08006 51.52106),City of London,darkpurple


In [138]:
#set up the map
m = folium.Map(location=[51.51757, -0.08881], zoom_start=20, min_zoom=10, prefer_canvas=True)

#set up the values
long=df_tech_gdf.longitude
lati=df_tech_gdf.latitude
coname=df_tech_gdf['CompanyName']
color=df_tech_gdf.color
se=df_tech_gdf.sector_name_1

feature_group = folium.FeatureGroup() 

MarkerCluster().add_to(m)

for lat, lng, name, col,se,name in zip(lati,long,coname, color,se,coname): 
    folium.Marker(
    location=[lat,lng],
    tooltip=name,
    popup=se,
    icon=folium.Icon(color= col, icon='globe', angle=0, prefix='fa')
        ).add_to(m)


# folium.CircleMarker(
#         [51.51757, -0.08881],
#         radius=800,
#         popup='Tech City',
#         color='lavender',
#         fill=True,
#         fill_color='#66023c',
#         fill_opacity=0.2, parse_html=False).add_to(m)

m.add_child(feature_group)

m.save('test2.html')