# London Borough

 In this notebook, 

In [1]:
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents

In [2]:
# get the response in the form of html
wikiurl="https://en.wikipedia.org/wiki/List_of_London_boroughs"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)
print(response.status_code)



200


In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
indiatable=soup.find('table',{'class':"wikitable"})

In [4]:
indiatable

<table class="wikitable sortable" style="font-size:100%" width="100%">
<tbody><tr>
<th>Borough
</th>
<th>Inner
</th>
<th>Status
</th>
<th>Local authority
</th>
<th>Political control
</th>
<th>Headquarters
</th>
<th>Area (sq mi)
</th>
<th>Population (2019 est)<sup class="reference" id="cite_ref-1"><a href="#cite_note-1">[1]</a></sup>
</th>
<th>Co-ordinates
</th>
<th><span style="background:#67BCD3"> Nr. in map </span>
</th></tr>
<tr>
<td><a href="/wiki/London_Borough_of_Barking_and_Dagenham" title="London Borough of Barking and Dagenham">Barking and Dagenham</a><sup class="reference" id="cite_ref-2"><a href="#cite_note-2">[note 1]</a></sup>
</td>
<td>
</td>
<td>
</td>
<td><a href="/wiki/Barking_and_Dagenham_London_Borough_Council" title="Barking and Dagenham London Borough Council">Barking and Dagenham London Borough Council</a>
</td>
<td><a href="/wiki/Labour_Party_(UK)" title="Labour Party (UK)">Labour</a>
</td>
<td><a href="/wiki/Barking_Town_Hall" title="Barking Town Hall">Town Hall

In [5]:
df=pd.read_html(str(indiatable))
# convert list to dataframe
df=pd.DataFrame(df[0])
df.head(20)

Unnamed: 0,Borough,Inner,Status,Local authority,Political control,Headquarters,Area (sq mi),Population (2019 est)[1],Co-ordinates,Nr. in map
0,Barking and Dagenham[note 1],,,Barking and Dagenham London Borough Council,Labour,"Town Hall, 1 Town Square",13.93,212906,".mw-parser-output .geo-default,.mw-parser-outp...",25
1,Barnet,,,Barnet London Borough Council,Conservative,"Barnet House, 2 Bristol Avenue, Colindale",33.49,395896,51°37′31″N 0°09′06″W﻿ / ﻿51.6252°N 0.1517°W,31
2,Bexley,,,Bexley London Borough Council,Conservative,"Civic Offices, 2 Watling Street",23.38,248287,51°27′18″N 0°09′02″E﻿ / ﻿51.4549°N 0.1505°E,23
3,Brent,,,Brent London Borough Council,Labour,"Brent Civic Centre, Engineers Way",16.7,329771,51°33′32″N 0°16′54″W﻿ / ﻿51.5588°N 0.2817°W,12
4,Bromley,,,Bromley London Borough Council,Conservative,"Civic Centre, Stockwell Close",57.97,332336,51°24′14″N 0°01′11″E﻿ / ﻿51.4039°N 0.0198°E,20
5,Camden,,,Camden London Borough Council,Labour,"Camden Town Hall, Judd Street",8.4,270029,51°31′44″N 0°07′32″W﻿ / ﻿51.5290°N 0.1255°W,11
6,Croydon,,,Croydon London Borough Council,Labour,"Bernard Weatherill House, Mint Walk",33.41,386710,51°22′17″N 0°05′52″W﻿ / ﻿51.3714°N 0.0977°W,19
7,Ealing,,,Ealing London Borough Council,Labour,"Perceval House, 14-16 Uxbridge Road",21.44,341806,51°30′47″N 0°18′32″W﻿ / ﻿51.5130°N 0.3089°W,13
8,Enfield,,,Enfield London Borough Council,Labour,"Civic Centre, Silver Street",31.74,333794,51°39′14″N 0°04′48″W﻿ / ﻿51.6538°N 0.0799°W,30
9,Greenwich [note 2],[note 3],Royal,Greenwich London Borough Council,Labour,"Woolwich Town Hall, Wellington Street",18.28,287942,51°29′21″N 0°03′53″E﻿ / ﻿51.4892°N 0.0648°E,22


In [6]:
df.to_csv("London_brough.csv")

In [7]:
df.shape

(32, 10)

In [None]:
df.head()

# Cleaning Data

## cleaning the 1st row

In [8]:
df['Co-ordinates'][0] = "51°33′39″N 0°09′21″E / 51.5607°N 0.1557°E"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Co-ordinates'][0] = "51°33′39″N 0°09′21″E / 51.5607°N 0.1557°E"


## splitting the Co-ordinates column

In [10]:


#working on the Co-ordinates
"""
DMS, which stands for Degrees, Minutes, Seconds…it would look like this: 32° 18' 23.1" N 122° 36' 52.5" W.

The next format would be Degrees and Decimal Minutes, it would look like this: 32° 18.385' N 122° 36.875' W.
"""

# Create two lists for the loop results to be placed

DMS = []
DDM = []
# For each row in a varible,
for row in df['Co-ordinates']:
    # Try to,
    try:
        # Split the row by comma and append
        # everything before the comma to lat
        DMS.append(row.split("/")[0])
        # Split the row by comma and append
        # everything after the comma to lon 
        DDM.append(row.split("/")[1])
    # But if you get an error
    except:
        # append a missing value to lat
        DMS.append(np.NaN)
        # append a missing value to lon
        DDM.append(np.NaN)
# Create two new columns from lat and lon
df['DMS'] = DMS
df['DDM'] = DDM

In [11]:
df.head()

Unnamed: 0,Borough,Inner,Status,Local authority,Political control,Headquarters,Area (sq mi),Population (2019 est)[1],Co-ordinates,Nr. in map,DMS,DDM
0,Barking and Dagenham[note 1],,,Barking and Dagenham London Borough Council,Labour,"Town Hall, 1 Town Square",13.93,212906,51°33′39″N 0°09′21″E / 51.5607°N 0.1557°E,25,51°33′39″N 0°09′21″E,51.5607°N 0.1557°E
1,Barnet,,,Barnet London Borough Council,Conservative,"Barnet House, 2 Bristol Avenue, Colindale",33.49,395896,51°37′31″N 0°09′06″W﻿ / ﻿51.6252°N 0.1517°W,31,51°37′31″N 0°09′06″W﻿,﻿51.6252°N 0.1517°W
2,Bexley,,,Bexley London Borough Council,Conservative,"Civic Offices, 2 Watling Street",23.38,248287,51°27′18″N 0°09′02″E﻿ / ﻿51.4549°N 0.1505°E,23,51°27′18″N 0°09′02″E﻿,﻿51.4549°N 0.1505°E
3,Brent,,,Brent London Borough Council,Labour,"Brent Civic Centre, Engineers Way",16.7,329771,51°33′32″N 0°16′54″W﻿ / ﻿51.5588°N 0.2817°W,12,51°33′32″N 0°16′54″W﻿,﻿51.5588°N 0.2817°W
4,Bromley,,,Bromley London Borough Council,Conservative,"Civic Centre, Stockwell Close",57.97,332336,51°24′14″N 0°01′11″E﻿ / ﻿51.4039°N 0.0198°E,20,51°24′14″N 0°01′11″E﻿,﻿51.4039°N 0.0198°E


## removing the spical charachter

In [12]:
df['DDM']=df['DDM'].replace('\°','',regex=True)


## splitting the DDM column 

In [13]:

df[['latitude', 'longitude']] = df['DDM'].str.split('N', 1, expand=True)

In [15]:
#removing the last char in lon column

df.longitude = df.longitude.str[:-1]


In [16]:
df.sample(20)

Unnamed: 0,Borough,Inner,Status,Local authority,Political control,Headquarters,Area (sq mi),Population (2019 est)[1],Co-ordinates,Nr. in map,DMS,DDM,latitude,longitude
14,Havering,,,Havering London Borough Council,Conservative (council NOC),"Town Hall, Main Road",43.35,259552,51°34′52″N 0°11′01″E﻿ / ﻿51.5812°N 0.1837°E,24,51°34′52″N 0°11′01″E﻿,﻿51.5812N 0.1837E,﻿51.5812,0.1837
3,Brent,,,Brent London Borough Council,Labour,"Brent Civic Centre, Engineers Way",16.7,329771,51°33′32″N 0°16′54″W﻿ / ﻿51.5588°N 0.2817°W,12,51°33′32″N 0°16′54″W﻿,﻿51.5588N 0.2817W,﻿51.5588,0.2817
13,Harrow,,,Harrow London Borough Council,Labour,"Civic Centre, Station Road",19.49,251160,51°35′23″N 0°20′05″W﻿ / ﻿51.5898°N 0.3346°W,32,51°35′23″N 0°20′05″W﻿,﻿51.5898N 0.3346W,﻿51.5898,0.3346
16,Hounslow,,,Hounslow London Borough Council,Labour,"Hounslow House, 7 Bath Road",21.61,271523,51°28′29″N 0°22′05″W﻿ / ﻿51.4746°N 0.3680°W,14,51°28′29″N 0°22′05″W﻿,﻿51.4746N 0.3680W,﻿51.4746,0.368
30,Wandsworth,,,Wandsworth London Borough Council,Conservative,"The Town Hall, Wandsworth High Street",13.23,329677,51°27′24″N 0°11′28″W﻿ / ﻿51.4567°N 0.1910°W,5,51°27′24″N 0°11′28″W﻿,﻿51.4567N 0.1910W,﻿51.4567,0.191
24,Redbridge,,,Redbridge London Borough Council,Labour,"Town Hall, 128-142 High Road",21.78,305222,51°33′32″N 0°04′27″E﻿ / ﻿51.5590°N 0.0741°E,26,51°33′32″N 0°04′27″E﻿,﻿51.5590N 0.0741E,﻿51.5590,0.0741
11,Hammersmith and Fulham[note 4],,,Hammersmith and Fulham London Borough Council,Labour,"Town Hall, King Street",6.33,185143,51°29′34″N 0°14′02″W﻿ / ﻿51.4927°N 0.2339°W,4,51°29′34″N 0°14′02″W﻿,﻿51.4927N 0.2339W,﻿51.4927,0.2339
0,Barking and Dagenham[note 1],,,Barking and Dagenham London Borough Council,Labour,"Town Hall, 1 Town Square",13.93,212906,51°33′39″N 0°09′21″E / 51.5607°N 0.1557°E,25,51°33′39″N 0°09′21″E,51.5607N 0.1557E,51.5607,0.1557
7,Ealing,,,Ealing London Borough Council,Labour,"Perceval House, 14-16 Uxbridge Road",21.44,341806,51°30′47″N 0°18′32″W﻿ / ﻿51.5130°N 0.3089°W,13,51°30′47″N 0°18′32″W﻿,﻿51.5130N 0.3089W,﻿51.5130,0.3089
17,Islington,,,Islington London Borough Council,Labour,"Customer Centre, 222 Upper Street",5.74,242467,51°32′30″N 0°06′08″W﻿ / ﻿51.5416°N 0.1022°W,10,51°32′30″N 0°06′08″W﻿,﻿51.5416N 0.1022W,﻿51.5416,0.1022


## Remove unnecassery columns

In [17]:
df.columns

Index(['Borough', 'Inner', 'Status', 'Local authority', 'Political control',
       'Headquarters', 'Area (sq mi)', 'Population (2019 est)[1]',
       'Co-ordinates', 'Nr. in map', 'DMS', 'DDM', 'latitude', 'longitude'],
      dtype='object')

In [93]:
df_clean = df.copy()

In [94]:
df_clean = df_clean.drop(labels=['Inner', 'Status','Co-ordinates', 'DMS', 'DDM'], axis=1)

In [95]:
df_clean.dtypes

Borough                      object
Local authority              object
Political control            object
Headquarters                 object
Area (sq mi)                float64
Population (2019 est)[1]      int64
Nr. in map                    int64
latitude                     object
longitude                    object
dtype: object

## clean latitude and longitude columns 

In [96]:
#replace(' \ufeff', '') from latitude and longitude column
df_clean['latitude'] = [float(str(i).replace(" \ufeff", "")) for i in df_clean['latitude']]
df_clean['longitude'] = [float(str(i).replace(" \ufeff", "")) for i in df_clean['longitude']]



In [97]:
df_clean.head()

Unnamed: 0,Borough,Local authority,Political control,Headquarters,Area (sq mi),Population (2019 est)[1],Nr. in map,latitude,longitude
0,Barking and Dagenham[note 1],Barking and Dagenham London Borough Council,Labour,"Town Hall, 1 Town Square",13.93,212906,25,51.5607,0.1557
1,Barnet,Barnet London Borough Council,Conservative,"Barnet House, 2 Bristol Avenue, Colindale",33.49,395896,31,51.6252,0.1517
2,Bexley,Bexley London Borough Council,Conservative,"Civic Offices, 2 Watling Street",23.38,248287,23,51.4549,0.1505
3,Brent,Brent London Borough Council,Labour,"Brent Civic Centre, Engineers Way",16.7,329771,12,51.5588,0.2817
4,Bromley,Bromley London Borough Council,Conservative,"Civic Centre, Stockwell Close",57.97,332336,20,51.4039,0.0198


## Change some columns type

In [98]:
df_clean['latitude'] = df_clean['latitude'].map(lambda x: float(x))
df_clean['longitude'] = df_clean['longitude'].map(lambda x: float(x))



In [99]:
# check the columns type
df_clean.dtypes

Borough                      object
Local authority              object
Political control            object
Headquarters                 object
Area (sq mi)                float64
Population (2019 est)[1]      int64
Nr. in map                    int64
latitude                    float64
longitude                   float64
dtype: object

In [106]:
#change the Borough column from object to string 

df_clean.Borough = df_clean.Borough.astype('string')


In [107]:
# check the columns type
df_clean.dtypes

Borough                      string
Local authority              object
Political control            object
Headquarters                 object
Area (sq mi)                float64
Population (2019 est)[1]      int64
Nr. in map                    int64
latitude                    float64
longitude                   float64
dtype: object

## cleaning Borough column

In [108]:
df_clean.Borough.value_counts()

Barking and Dagenham[note 1]      1
Barnet                            1
Bexley                            1
Brent                             1
Bromley                           1
Camden                            1
Croydon                           1
Ealing                            1
Enfield                           1
Greenwich [note 2]                1
Hackney                           1
Hammersmith and Fulham[note 4]    1
Haringey                          1
Harrow                            1
Havering                          1
Hillingdon                        1
Hounslow                          1
Islington                         1
Kensington and Chelsea            1
Kingston upon Thames              1
Lambeth                           1
Lewisham                          1
Merton                            1
Newham                            1
Redbridge                         1
Richmond upon Thames              1
Southwark                         1
Sutton                      

In [110]:
import re 

df_clean['Borough'] = df_clean['Borough'].str.replace(r"[\(\[].*?[\)\]]","")


#re.sub("[\(\[].*?[\)\]]", "", df_clean['Borough'])


  df_clean['Borough'] = df_clean['Borough'].str.replace(r"[\(\[].*?[\)\]]","")


In [113]:
df['Borough'] = df['Borough'].str.strip()

In [114]:
df_clean['Borough'].value_counts()

Barking and Dagenham      1
Barnet                    1
Bexley                    1
Brent                     1
Bromley                   1
Camden                    1
Croydon                   1
Ealing                    1
Enfield                   1
Greenwich                 1
Hackney                   1
Hammersmith and Fulham    1
Haringey                  1
Harrow                    1
Havering                  1
Hillingdon                1
Hounslow                  1
Islington                 1
Kensington and Chelsea    1
Kingston upon Thames      1
Lambeth                   1
Lewisham                  1
Merton                    1
Newham                    1
Redbridge                 1
Richmond upon Thames      1
Southwark                 1
Sutton                    1
Tower Hamlets             1
Waltham Forest            1
Wandsworth                1
Westminster               1
Name: Borough, dtype: Int64

## Rename column (Population (2019 est)[1])

In [116]:

df_clean.rename(columns = {'Population (2019 est)[1]':'Population_2019'}, inplace = True)


# Export the cleaning dataframe 

In [118]:
df_clean.to_csv('clean_london_data.csv')