# Tech City Growth Over Time

## Import Libraries

In [1]:
import os
import platform
import getpass
import numpy as np
import pandas as pd
import bar_chart_race as bcr
import matplotlib.pyplot as plt
from matplotlib import rc
import datetime
import scipy.stats as stats
from bokeh.io import output_file
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.plotting import figure, show
from bokeh.layouts import column

## Load Data

In [2]:
# Check working directory
base_path = '../..'

#Load data
pivot = pd.read_csv(
    os.path.join(base_path, 'datasets', 'pivot_table_year_cumcount.csv'))
uk = pd.read_csv(os.path.join(base_path, 'datasets', 'uk_plotting_data.csv'))
tech_coord = pd.read_csv(
    os.path.join(base_path, 'datasets', 'tech_roundabout_merge.csv'))
clean_data = pd.read_csv(
    os.path.join(base_path, 'datasets', 'industry_added_cleaned_data.csv'))

In [3]:
#merge tech coord and clean_data
all_data = clean_data.merge(tech_coord, how='left', on='CompanyName')

## Timeline Chart

In [4]:
# change incorportation date to exclude days
all_data['date'] = pd.to_datetime(all_data[['year', 'month']].assign(day=1))

In [5]:
# create new df with all years and month sand counts
date_data = pd.DataFrame(all_data.groupby(['date'])['count'].agg('sum'))
date_data.reset_index(inplace=True)

In [6]:
# set save path
file_path = os.path.join('output', 'timeline_chart.html')
output_file(file_path)
print(f"The html file is saved to {file_path}")

# data manipulation
dates = np.array(date_data['date'], dtype=np.datetime64)
source = ColumnDataSource(data=dict(date=dates, count=date_data['count']))

# initialize the plot
p = figure(plot_height=300,
           plot_width=800,
           tools="xpan",
           toolbar_location=None,
           x_axis_type="datetime",
           x_axis_location="above",
           x_range=(
               dates[0],
               dates[28],
           ),
           background_fill_color="#efefef")

# plot data
p.line('date', 'count', source=source)

# labels
p.yaxis.axis_label = 'Compnies Created'

# initialize lower panel plot
select = figure(title="Tech City Company Incorporation Overview Per Month",
                plot_height=130,
                plot_width=800,
                y_range=p.y_range,
                x_axis_type="datetime",
                y_axis_type=None,
                tools="",
                toolbar_location=None,
                background_fill_color="#efefef")

# initialize data selector
range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "green"
range_tool.overlay.fill_alpha = 0.2

# plot data
select.line('date', 'count', source=source)
select.ygrid.grid_line_color = None

# add selector
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool

# show plot
show(column(p, select))

The html file is saved to output/timeline_chart.html


## Racing Horizontal Bar Chart by Sector

In [7]:
# Set year as index
pivot = pivot.set_index('year')
pivot.head()

Unnamed: 0_level_0,Biotech Rsrch.,Business Software Dev.,Computer Game Companies,Computer facilities mgmt.,Data Companies,Fund management activities,Info Tech Consulting,Info Tech services,Natural Sciences Rsrch.,Open-ended Investors,Satellite Telecommuncations,Social Sciences Rsrch.,Software Dev.,Technical testing and analysis,Venture Capital,Web portals,Wireless Telecommuncations
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1991,0,1,0,0,1,0,3,0,0,0,0,0,3,0,0,0,0
1992,0,3,0,2,1,1,6,3,0,0,0,1,4,0,1,0,0
1993,0,5,0,2,2,2,10,6,1,0,0,1,4,0,1,0,0
1994,0,8,0,2,2,4,12,9,1,0,0,1,5,0,3,0,3
1995,0,10,0,2,2,6,18,15,1,0,0,1,5,0,5,1,7


### Create the Racing Bar Chart

In [8]:
# set font
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Avant Garde']})
rc('text', usetex=True)

In [9]:
# set save path
file_path = os.path.join('output', 'Sector_Comparison.html')

# save the visualization
bcr.bar_chart_race(df=pivot,
                   title='Tech City Sector Growth Over Time',
                   steps_per_period=10,
                   period_length=500,
                   shared_fontdict={
                       'family': 'sans-serif',
                       'sans-serif': ['Avant Garde'],
                       'weight': 'bold',
                       'color': 'black'
                   },
                   perpendicular_bar_func='mean',
                   filename=file_path)

print(f"The html file is saved to {file_path}")

  ax.set_yticklabels(self.df_values.columns)
  ax.set_xticklabels([max_val] * len(ax.get_xticks()))


The html file is saved to output/Sector_Comparison.html
