In [None]:
# Import all the necessary packages.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Optional - Ignore warnings.
import warnings
warnings.filterwarnings('ignore')

In this part we use .csv files generated using R script to get infrastructure from TFL database.
We analyse the following infrastructure:
- parking capacity
- cycle lanes length:
    -  segregated fully and partially
    - cycle lanes and tracks along waterways and in the parks
- traffic calming measures (speed humps, road narrowing etc.)
- Advanced Stop Lines (ASL) length (junction safety)
- signal lights for cyclists (junction safety)
- crossings for cyclists (junction safety)

# CYCLE PARKING

In [None]:
# Read the provided CSV file: cycle_parking.csv.
cycle_parking = pd.read_csv('cycle_parking.csv') 

# View the DataFrame, types and shape of the data.
print(cycle_parking.shape)
print(cycle_parking.dtypes)
cycle_parking.head()

In [None]:
# Determine whether there are missing values.
cycle_parking.isnull().sum()

In [None]:
# Determine the metadata of the data set.
cycle_parking.info()

In [None]:
# Convert 'SVDATE' column to datetime dtype.
cycle_parking['SVDATE'] = pd.to_datetime(cycle_parking['SVDATE'])
# Check the types.
cycle_parking.info()

In [None]:
# Determine the descriptive statistics of the data set.
cycle_parking.describe()

In [None]:
# Import and sense-check the boroughs.csv data set as boroughs.
boroughs = pd.read_csv('boroughs.csv') 

# View the DataFrame, shape and type of data.
print(boroughs.shape)
print(boroughs.dtypes)
boroughs.head()

In [None]:
# Merge to add information about Inner/Outer area of London with the cycle_parking.
cycle_parking = pd.merge(cycle_parking, boroughs, how='left', on = 'BOROUGH')

# View the result.
cycle_parking.head()

In [None]:
# Histogram to see the distribution of data.
sns.histplot(data = cycle_parking, x = 'SVDATE', color = 'blue', bins = 30, kde = True)

note: most of the information about cycle lanes was updated in 2017. Most probably 2017 includes all cycle lanes built earlier and it is the first survey done by TFL.

In [None]:
# Determine boroughs WITH LARGEST cycle parking capacity in Inner and Outer London.
df_parking = cycle_parking.groupby(['Area', 'BOROUGH']) \
                                    ['PRK_CARR', 'PRK_COVER', 'PRK_LOCKER',
                                    'PRK_SHEFF', 'PRK_MSTAND', 'PRK_PSTAND',
                                    'PRK_HOOP', 'PRK_POST', 'PRK_BUTERF',
                                    'PRK_WHEEL', 'PRK_HANGAR', 'PRK_TIER',
                                    'PRK_OTHER', 'PRK_PROVIS', 'PRK_CPT'].sum() \
                            .reset_index() \
                            .sort_values(by= ['Area', 'PRK_CPT'])
df_parking.head()

In [None]:
# Overview summary statistics.
df_parking.describe()

- There are 33 boroughs in the dataframe.
- In average there are 2049 parking sites for 4422 cycles in every borough.
- Maximum number of parking sites is 5332.
- Sheffield stand is the most frequent kind of cycle parking in London's borough (mean of 531).
- Post stand has less than 1 mean value and the rarest in London (0.76)

In [None]:
# Export .csv with information on cycle parking by borough.
df_parking.to_csv('df_parking.csv')

In [None]:
# Visualize the distribution of cycle capacity between boroues in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})

# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot showing the parking capacity for each borough.
b=sns.barplot(y='BOROUGH', x='PRK_CPT', data=df_parking, color = 'royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
b.set_ylabel('Borough', fontsize=35, color = 'white')
b.set_xlabel('Parking Capacity', fontsize=35)
b.set_title('Cycle Parkings Capacity by Borough', fontsize=40)
i=0

# CYCLE LANES

In [None]:
# Read the provided CSV file: cycle_lane.csv.
cycle_lane = pd.read_csv('cycle_lane.csv')

# View the DataFrame, types and shape of the data.
print(cycle_lane.shape)
print(cycle_lane.dtypes)
cycle_lane.head()

In [None]:
# Determine whether there are missing values.
cycle_lane.isnull().sum()

In [None]:
# Determine the metadata of the data set.
cycle_lane.info()

In [None]:
# Convert 'SVDATE' column to datetime dtype.
cycle_lane['SVDATE'] = pd.to_datetime(cycle_lane['SVDATE'])
# Check the types.
cycle_lane.info()

In [None]:
# Merge to add information about Inner/Outer area of London to the df.
cycle_lane = pd.merge(cycle_lane, boroughs, how='left', on = 'BOROUGH')
cycle_lane.head()

In [None]:
# Add length in km to the dataframe.
cycle_lane['length_km'] = cycle_lane['length_m']/1000
cycle_lane.head()

In [None]:
# Determine the descriptive statistics of the data set.
cycle_lane.describe()

- 24690 cycles lanes defined in the dataframe.
- Average length of 115.82 meters.
- Maximum length of cycle lane is 19.78km.

In [None]:
# Histogram to see the distribution.
sns.histplot(data = cycle_lane, x = 'SVDATE', color = 'blue', bins = 30, kde = True)

most of information created between May 2017 and May 2018.

In [None]:
# Determine boroughs with the longest cycle lanes available in Inner and Outer London.
df_cl = cycle_lane.groupby(['Area', 'BOROUGH']) \
                            ['CLT_CARR', 'CLT_SEGREG', 'CLT_STEPP',
                            'CLT_PARSEG', 'CLT_SHARED', 'CLT_MANDAT',
                            'CLT_ADVIS', 'CLT_PRIORI', 'CLT_CONTRA',
                            'CLT_BIDIRE', 'CLT_CBYPAS', 'CLT_BBYPAS',
                            'CLT_BBYPAS', 'CLT_PARKR', 'CLT_WATERR',
                            'CLT_PTIME', 'length_km'].sum() \
                    .reset_index().sort_values(by=['Area', 'length_km'])
df_cl.head()

In [None]:
# Overview summary statistics.
df_cl.describe()

In [None]:
# Export .csv with cycle lanes length by borough information.
df_cl.to_csv('df_cl.csv')

In [None]:
# Visualize the distribution of cycle lanes length between boroughs in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot with total cycle lanes length by boroughs in Inner and Outer London.
b=sns.barplot(y='BOROUGH', x='length_km', data=df_cl, color = 'royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
b.set_ylabel('Borough', fontsize=35, color = 'white')
b.set_xlabel('Cycle Lanes (km)', fontsize=35)
b.set_title('Cycle Lanes Length by Borough', fontsize=40)
i=0

# Investigate Full and Partially Segregated Cycle Lanes.

In [None]:
# Create a df only for fully or patially segregated cycle lanes.
segregate = cycle_lane[(cycle_lane['CLT_SEGREG'] != 0) | (cycle_lane['CLT_PARSEG'] != 0)]
segregate.head()

In [None]:
# Determine boroughs with the longest segregated or partially segregated cycle lanes available in Inner and Outer London.
df_seg = segregate.groupby(['Area', 'BOROUGH']) \
                            ['CLT_CARR', 'CLT_SEGREG', 'CLT_STEPP',
                            'CLT_PARSEG', 'CLT_SHARED', 'CLT_MANDAT',
                            'CLT_ADVIS', 'CLT_PRIORI', 'CLT_CONTRA',
                            'CLT_BIDIRE', 'CLT_CBYPAS', 'CLT_BBYPAS',
                            'CLT_BBYPAS', 'CLT_PARKR', 'CLT_WATERR',
                            'CLT_PTIME', 'length_km'].sum() \
                    .reset_index().sort_values(by=['Area', 'length_km'])
df_seg.head()

In [None]:
# Rename the column for the length of segregated cycle lanes.
df_seg.rename(columns={'length_km' : 'seg_length_km'}, inplace=True)
df_seg.head()

In [None]:
# Create a barplot for segregated lanes in Inner and Outer London.
b=sns.barplot(y='BOROUGH', x='seg_length_km', data=df_seg)

# Annotate the axes, labels and ticks.
b.set_xlabel('Length (km)', fontsize=15)
b.set_ylabel('Borough', fontsize=15)
b.set_title('Fully and Partially Segregated Lanes Length', fontsize=18)
sns.set_style('ticks',{'axes.grid' : True})
i=0

In [None]:
# Merge to add information about segregated lanes Inner/Outer area of London to the df.
df_cl_seg = pd.merge(df_cl, df_seg[['BOROUGH', 'seg_length_km']], on = 'BOROUGH')
df_cl_seg.head()

In [None]:
# Create a column with %-ge of segregated lanes out of total by borough.
df_cl_seg['Segregated %'] = df_cl_seg['seg_length_km']/df_cl_seg['length_km']*100
df_cl_seg = df_cl_seg.sort_values(by = ['Area', 'Segregated %'])
df_cl_seg.head()

In [None]:
# General statistics.
df_cl_seg.describe()

- On average only about 11.87% of all cycle lanes in London are fully or partially segregated.
- It is about 10km of segregated cycle lanes by borough. Maximum is 30km.
- Maximum is 26.42% of segregated lanes in one borough.
- About 415 of them are located on cariageways (CLT_CARR).

In [None]:
# Visualize share of segregated and partially segregated cycle lanes by borough among Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot to see the %-ge of segregated lanes out of total by borough.
b=sns.barplot(y='BOROUGH', x='Segregated %', data=df_cl_seg, color = 'royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
b.set_ylabel('Borough', fontsize=35, color = 'white')
b.set_xlabel('Percetage', fontsize=35)
b.set_title('Percentage of Fully or Partially Segregated Cycle Lanes by Borough', fontsize=40)
i=0

# Investigate Cycle Lanes in the Parks or Along Waterways.

In [None]:
# Create a df only for cycle lanes in parks and along the riversides.
parks = cycle_lane[(cycle_lane['CLT_PARKR'] != 0) & (cycle_lane['CLT_CARR'] == 0) | (cycle_lane['CLT_WATERR'] != 0)]
parks.head()

In [None]:
# Determine boroughs with the longest cycle lanes in the parks or along watersides in London.
df_parks = parks.groupby(['Area', 'BOROUGH']) \
                        ['CLT_CARR', 'CLT_SEGREG', 'CLT_STEPP',
                        'CLT_PARSEG', 'CLT_SHARED', 'CLT_MANDAT',
                        'CLT_ADVIS', 'CLT_PRIORI', 'CLT_CONTRA',
                        'CLT_BIDIRE', 'CLT_CBYPAS', 'CLT_BBYPAS',
                        'CLT_BBYPAS', 'CLT_PARKR', 'CLT_WATERR',
                        'CLT_PTIME', 'length_km'].sum() \
                    .reset_index().sort_values(by=['Area', 'length_km'])
df_parks.head()

In [None]:
# Rename the column for the length of cycle lanes in the parks or watersides.
df_parks.rename(columns={'length_km' : 'park_length_km'}, inplace=True)
df_parks.head()

In [None]:
# Create a barplot for park and waterside lanes in Inner and Outer London.
b=sns.barplot(y='BOROUGH', x='park_length_km', data=df_parks)

# Annotate the axes, labels and ticks.
b.set_xlabel('Cycle Lanes Length (km)', fontsize=15)
b.set_ylabel('Borough', fontsize=15)
b.set_title('Parks and Waterside Cycle Lanes Length', fontsize=18)
sns.set_style('ticks',{'axes.grid' : True})
i=0

In [None]:
# Merge to add information about parks and waterside lanes Inner/Outer area of London to the df.
df_cl_parks = pd.merge(df_cl, df_parks[['BOROUGH', 'park_length_km']], on = 'BOROUGH')
df_cl_parks.head()

In [None]:
# Create a column with %-ge of parks and waterside lanes out of total by borough.
df_cl_parks['Parks %'] = df_cl_parks['park_length_km']/df_cl_parks['length_km']*100
df_cl_parks = df_cl_parks.sort_values(by = ['Area', 'Parks %'])
df_cl_parks.head()

In [None]:
# Check the summary statistics.
df_cl_parks.describe()

- 32 boroughs (i.e. one has none cycle ways in park or along waterway)
- Average 47.87% of cycle lanes are located in the parks or along the waterways.
- Average length of such lanes is 44.10km.
- In one of the borough 71.18% of cycle lanes are located in the parks or along the waterways.

In [None]:
# Visualize %-ge of cycle lanes in the parks or along the waterways by borough in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot to see the %-ge of parks or waterside lanes out of total by borough.
b=sns.barplot(y='BOROUGH', x='Parks %', data=df_cl_parks, color = 'royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
b.set_ylabel('Borough', fontsize=35, color = 'white')
b.set_xlabel('Percetage', fontsize=35)
b.set_title('Percentage of Park or Waterside Cycle Lanes by Borough', fontsize=40)
i=0

In [None]:
# Create a separate view only for the boroughs in Outer London.
df_cl_parks_out = df_cl_parks[(df_cl_parks['Area'] == 'Outer')]
df_cl_parks_out.head()

In [None]:
# Visualize %-ge of the cycle lanes in the parks or waterways in Outer London only.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot to see the %-ge of parks or waterside lanes out of total by borough in Outer London only.
b=sns.barplot(y='BOROUGH', x='Parks %', data=df_cl_parks_out, color = 'royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
b.set_ylabel('Borough', fontsize=35, color = 'white')
b.set_xlabel('Percetage', fontsize=35)
b.set_title('Percentage of Park or Waterside Cycle Lanes by Borough', fontsize=40)
i=0

# Traffic Calming 

In [None]:
# Read the provided CSV file: cycle_calm.csv.
cycle_calm = pd.read_csv('cycle_calm.csv')

# View the DataFrame, types and shape of the data.
print(cycle_calm.shape)
print(cycle_calm.dtypes)
cycle_calm.head()

In [None]:
# Determine whether there are missing values.
cycle_calm.isnull().sum()

In [None]:
# Determine the metadata of the data set.
cycle_calm.info()

In [None]:
# Convert 'SVDATE' column to datetime dtype.
cycle_calm['SVDATE'] = pd.to_datetime(cycle_calm['SVDATE'])
# Check the types.
cycle_calm.info()

In [None]:
# Merge to add information about Inner/Outer area of London to the df.
cycle_calm = pd.merge(cycle_calm, boroughs, how='left', on = 'BOROUGH')
cycle_calm.head()

In [None]:
# Histogram to see the distribution.
sns.histplot(data = cycle_calm, x = 'SVDATE', color = 'blue', bins = 30, kde = True)

most of data included between July 2017 and March 2018

In [None]:
# Create additional column summing up all types of calming traffic measures.
cycle_calm['TOTAL_TRF'] = cycle_calm[['TRF_RAISED', 'TRF_ENTRY', 'TRF_CUSHI',
                                    'TRF_HUMP', 'TRF_SINUSO', 'TRF_BARIER',
                                    'TRF_NAROW', 'TRF_CALM']].sum(axis=1)
cycle_calm.head()     

In [None]:
# Determine boroughs with the largest number of speed calming measures in Inner and Outer London.
df_calm = cycle_calm.groupby(['Area', 'BOROUGH']) \
                            ['TRF_RAISED', 'TRF_ENTRY', 'TRF_CUSHI',
                            'TRF_HUMP', 'TRF_SINUSO', 'TRF_BARIER',
                            'TRF_NAROW', 'TRF_CALM', 'TOTAL_TRF'].sum() \
                    .reset_index().sort_values(by=['Area', 'TOTAL_TRF'])
df_calm.head()

In [None]:
# Overview the summary statistics.
df_calm.describe()

- On average there are 1978 calming measuers per borough in London.
- Speed humps are the most frequent, average of 1008 per borough.
- Narrowings of the road to reduce speed is the rarest - 20 per borough.
- Maximum number of total calming measures is 4264 in one borough.

In [None]:
# Export .csv with information about calming measures by borough and by area.
df_calm.to_csv('df_calm.csv')

In [None]:
# Visualize the distribution of traffic calming measures between boroughs in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot to see all calming mesures by boroughs of inner and Outer London.
c=sns.barplot(y='BOROUGH', x='TOTAL_TRF', data=df_calm, color='royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
c.set_ylabel('Borough', fontsize=35, color = 'white')
c.set_xlabel('Number of Traffic Calming Measures', fontsize=35)
c.set_title('Road Calming Measures by Borough', fontsize=40)
i=0

# Advanced Stop Lines (ASL).

In [None]:
# Read the provided CSV file: cycle_asl.csv.
cycle_asl = pd.read_csv('cycle_asl.csv')

# View the DataFrame, types and shape of the data.
print(cycle_asl.shape)
print(cycle_asl.dtypes)
cycle_asl.head()

In [None]:
# Determine whether there are missing values.
cycle_asl.isnull().sum()

In [None]:
# Determine the metadata of the data set.
cycle_calm.info()

In [None]:
# Convert 'SVDATE' column to datetime dtype.
cycle_asl['SVDATE'] = pd.to_datetime(cycle_asl['SVDATE'])
# Check the types.
cycle_asl.info()

In [None]:
# Merge to add information about Inner/Outer area of London to the df.
cycle_asl = pd.merge(cycle_asl, boroughs, how='left', on = 'BOROUGH')
cycle_asl.head()

In [None]:
# Histogram to see the distribution.
sns.histplot(data = cycle_asl, x = 'SVDATE', color = 'blue', bins = 30, kde = True)

In [None]:
# Overview the statistics.
cycle_asl.describe()

In [None]:
# Determine boroughs with the longest of ASL in Inner and Outer London. 
df_asl = cycle_asl.groupby(['Area', 'BOROUGH']) \
                            ['ASL_FDR', 'ASL_FDRLFT', 'ASL_FDCENT',
                            'ASL_FDRIGH', 'ASL_SHARED', 'asl_length_m'].sum() \
                        .reset_index().sort_values(by=['Area', 'asl_length_m'])
df_asl.head()

In [None]:
# Overview summary statistics.
df_asl.describe()

- Mean length of all ASLs per borough is 524.93 meters.
- Sum of longest ASLs is 1.6km in a borough.
- Most of ASL is Feeder lane left (54) - closest to the footlane and maximum of 145.
- The rarest is shared ASL (0.2 per botough).

In [None]:
# Export.csv file with ASL per borough and area details.
df_asl.to_csv('df_asl.csv')

In [None]:
# Visualize the distribution of ASLs lengths between boroughs in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot.
c=sns.barplot(y='BOROUGH', x='asl_length_m', data=df_asl, color = 'royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
c.set_ylabel('Borough', fontsize=35, color = 'white')
c.set_xlabel('ASL Length (m)', fontsize=35)
c.set_title('Advanced Stop Lanes by Borough', fontsize=40)
i=0

# Signals For Cycles.

In [None]:
# Read the provided CSV file: cycle_signal.csv.
cycle_signal = pd.read_csv('cycle_signal.csv')

# View the DataFrame, types and shape of the data.
print(cycle_signal.shape)
print(cycle_signal.dtypes)
cycle_signal.head()

In [None]:
# Determine whether there are missing values.
cycle_signal.isnull().sum()

In [None]:
# Determine the metadata of the data set.
cycle_signal.info()

In [None]:
# Convert 'SVDATE' column to datetime dtype.
cycle_signal['SVDATE'] = pd.to_datetime(cycle_signal['SVDATE'])
# Check the types.
cycle_signal.info()

In [None]:
# Histogram to see the distribution.
sns.histplot(data = cycle_signal, x = 'SVDATE', color = 'blue', kde = True)

In [None]:
# Merge to add information about Inner/Outer area of London to the df.
cycle_signal = pd.merge(cycle_signal, boroughs, how='left', on = 'BOROUGH')
cycle_signal.head()

In [None]:
# Determine boroughs with the largest number of signals in London.
df_signal = cycle_signal.groupby(['Area', 'BOROUGH']) \
                                ['SIG_HEAD', 'SIG_SEPARA', 'SIG_EARLY',
                                'SIG_TWOSTG', 'SIG_GATE'].sum() \
                        .reset_index().sort_values(by=['Area', 'SIG_HEAD'])
df_signal.head()

In [None]:
# Overview summary statistics.
df_signal.describe()

- 23 boroughs only.
- The most common is the signal with cycle sign on it (19 per borough).
- Maximum the borough has 11 signals.
- The rearest is two stages turn signal (1.2 per borough mean).

In [None]:
# Export.csv file with signal lights per borough and area details.
df_signal.to_csv('df_signal.csv')

In [None]:
# Visualize the distribution of signal lights for cyclists between boroughs in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot.
c=sns.barplot(y='BOROUGH', x='SIG_HEAD', data=df_signal, color='royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
c.set_ylabel('Borough', fontsize=35, color = 'white')
c.set_xlabel('Number of Signal Lights with Cycle Sign', fontsize=35)
c.set_title('Signal Lights with Cycle Sign by Borough', fontsize=40)
i=0

# Crossings For Cycles.

In [None]:
# Read the provided CSV file: cycle_cross.csv.
cycle_cross = pd.read_csv('cycle_cross.csv')

# View the DataFrame, types and shape of the data.
print(cycle_cross.shape)
print(cycle_cross.dtypes)
cycle_cross.head()

In [None]:
# Check for missing values.
cycle_cross.isnull().sum()

In [None]:
# Determine the metadata of the data set.
cycle_cross.info()

In [None]:
# Convert 'SVDATE' column to datetime dtype.
cycle_cross['SVDATE'] = pd.to_datetime(cycle_cross['SVDATE'])
# Check the types.
cycle_cross.info()

In [None]:
# Merge to add information about Inner/Outer area of London to the df.
cycle_cross = pd.merge(cycle_cross, boroughs, how='left', on = 'BOROUGH')
cycle_cross.head()

In [None]:
# Histogram to see the distribution.
sns.histplot(data = cycle_cross, x = 'SVDATE', color = 'blue', kde = True)

In [None]:
# Create a column to calculate total number of crossings for cyclists.
cycle_cross['TOTAL_CRS'] = cycle_cross[['CRS_SIGNAL', 'CRS_SEGREG', 'CRS_CYGAP','CRS_PEDEST', 'CRS_LEVEL']].sum(axis=1)
cycle_cross.head()

In [None]:
# Determine boroughs with the largest number of cyclists crossings by borough in Inner and Outer London.
df_cross = cycle_cross.groupby(['Area', 'BOROUGH']) \
                                ['CRS_SIGNAL', 'CRS_SEGREG', 'CRS_CYGAP',
                                'CRS_PEDEST', 'CRS_LEVEL', 'TOTAL_CRS'].sum() \
                        .reset_index().sort_values(by=['Area', 'TOTAL_CRS'])
df_cross.head()

In [None]:
# Overview summary statistics.
df_cross.describe()

- Average number of crossings is 53 per borough.
- There are 118 crossigns in one of the boroughs.
- The most frequent is the signal controled crossing: 38 mean per borough.
- The rarest is the level crossing (0.6) crossing the railways.

In [None]:
# Export to .csv file with information about crossings per borough and area.
df_cross.to_csv('df_cross.csv')

In [None]:
# Visualize the distribution of crossings for cyclists between boroughs in Inner and Outer London.

# Set figure size.
sns.set(rc={'figure.figsize':(15, 12)})
# Set the plot style as darkgrid.
sns.set_style('darkgrid')

# Create a barplot for cyclist crossings by boroughs in Inner and Outer London.
c=sns.barplot(y='BOROUGH', x='TOTAL_CRS', data=df_cross, color='royalblue')

# Annotate the axes, labels and ticks.
plt.yticks(fontsize=25)
plt.xticks(fontsize=30)
c.set_ylabel('Borough', fontsize=35, color = 'white')
c.set_xlabel('Number of Cycle Crossings', fontsize=35)
c.set_title('Cycle Crossings by Borough', fontsize=40)
i=0