In [1]:
import pandas as pd
import requests
import urllib.parse
import geopandas as gpd


In [2]:
inactivity_df = pd.read_csv('ni_inac.csv')
inactivity_df.rename(columns={'Census 2021 Super Data Zone Code': 'SDZ'}, inplace=True)

In [5]:
inactivity_df['Economic Activity - 9 Categories Label'].value_counts()

Economically active: Employee (including full-time students)         850
Economically active: Self-employed (including full-time students)    850
Economically active: Unemployed (including full-time students)       850
Economically inactive: Retired                                       850
Economically inactive: Student                                       850
Economically inactive: Looking after home or family                  850
Economically inactive: Long-term sick or disabled                    850
Economically inactive: Other                                         850
No code required                                                     850
Name: Economic Activity - 9 Categories Label, dtype: int64

In [120]:
pop_df = pd.read_excel('ni_pop.xlsx', sheet_name='SDZ', skiprows=5)
pop_df.rename(columns={'All usual residents':'Population', 'Geography Code': "SDZ"}, inplace=True)
pop_df = pop_df[['SDZ', 'Population']]

In [121]:
df = pd.merge(inactivity_df, pop_df, on='SDZ', how='left')
df.rename(columns={'Economic Activity - 9 Categories Label':'Series', 'Census 2021 Super Data Zone Label': 'Area'}, inplace=True)
df['Value'] = df['Count']/df['Population']
df = df[["SDZ", "Area", "Series", "Count", "Population", "Value"]]

file_names = {
 'Economically active: Employee (including full-time students)':'employee.csv',
 'Economically active: Self-employed (including full-time students)': 'self_employed.csv',
 'Economically active: Unemployed (including full-time students)': 'unemployed.csv',
 'Economically inactive: Retired': 'retired.csv',
 'Economically inactive: Student': 'student.csv',
 'Economically inactive: Looking after home or family': 'home.csv',
 'Economically inactive: Long-term sick or disabled': 'sick.csv',
 'Economically inactive: Other': 'other.csv',
 'No code required': 'no_code.csv'
}

for series in df.Series.unique():
    df[df.Series == series].to_csv("/Users/finn/Documents/GitHub/RADataHub/Economic Inactivity/NI_Activity/"+file_names[series], index=False)

In [124]:
df[df.Series.str.contains('Economically inactive')].groupby(['SDZ', 'Area']).sum().to_csv('/Users/finn/Documents/GitHub/RADataHub/Economic Inactivity/NI_Activity/inactive.csv')

df[df.Series.str.contains('Economically inactive')].groupby(['SDZ', 'Area']).sum().to_csv('/Users/finn/Documents/GitHub/RADataHub/Economic Inactivity/NI_Activity/inactive.csv')

In [59]:
df[df.Area.str.contains('park')]

Unnamed: 0,SDZ,Area,Series,Count,Population,Value
1440,N21000161,Oldpark_A,Economically active: Employee (including full-...,1022,2707,0.377540
1441,N21000161,Oldpark_A,Economically active: Self-employed (including ...,116,2707,0.042852
1442,N21000161,Oldpark_A,Economically active: Unemployed (including ful...,69,2707,0.025489
1443,N21000161,Oldpark_A,Economically inactive: Retired,192,2707,0.070927
1444,N21000161,Oldpark_A,Economically inactive: Student,138,2707,0.050979
...,...,...,...,...,...,...
1597,N21000178,Oldpark_U,Economically inactive: Student,92,1775,0.051831
1598,N21000178,Oldpark_U,Economically inactive: Looking after home or f...,114,1775,0.064225
1599,N21000178,Oldpark_U,Economically inactive: Long-term sick or disabled,269,1775,0.151549
1600,N21000178,Oldpark_U,Economically inactive: Other,120,1775,0.067606


In [39]:
places_df = pd.read_excel('/Users/finn/Documents/Data_Dump/NI_Places.xlsx')

In [46]:
def get_coords(place):
    """Get the coordinates of a place using the Google Maps API"""
    place = place+" Northern Ireland, UK"
    url = 'https://nominatim.openstreetmap.org/search?q=' + urllib.parse.quote(place) + '&format=json'

    print(url)

    response = requests.get(url).json()
    print(response)
    return [response[0]["lat"], response[0]["lon"]]

if input("Do you want to get the coordinates of the places? (y/n)") == "y":
    places_df['coords'] = places_df['City'].apply(get_coords)

https://nominatim.openstreetmap.org/search?q=Belfast%20Northern%20Ireland%2C%20UK&format=json
[{'place_id': 13725155, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'node', 'osm_id': 1418701024, 'lat': '54.596391', 'lon': '-5.9301829', 'class': 'place', 'type': 'city', 'place_rank': 16, 'importance': 0.6500238353643837, 'addresstype': 'city', 'name': 'Belfast', 'display_name': 'Belfast, County Antrim, Ulster, Northern Ireland / Tuaisceart Éireann, BT1 5GS, United Kingdom', 'boundingbox': ['54.4363910', '54.7563910', '-6.0901829', '-5.7701829']}]
https://nominatim.openstreetmap.org/search?q=Londonderry%20Northern%20Ireland%2C%20UK&format=json
[{'place_id': 730705, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'node', 'osm_id': 267762522, 'lat': '54.9978678', 'lon': '-7.3213056', 'class': 'place', 'type': 'city', 'place_rank': 16, 'importance': 0.5541638863419593, 'addresstype': 'city', 'na

In [48]:
places_df['lat'] = places_df['coords'].apply(lambda x: x[0])
places_df['lng'] = places_df['coords'].apply(lambda x: x[1])

In [52]:
places_df[['City', 'Population', 'lat', 'lng']].to_csv("/Users/finn/Documents/GitHub/RADataHub/Economic Inactivity/ni_cities.csv", index=False)

# Belfast Place Names

In [109]:
#load the geojson file
wards_df = gpd.read_file("/Users/finn/Documents/Data_Dump/ni_dea.geojson")
wards_df["lon"] = wards_df["geometry"].centroid.x
wards_df["lat"] = wards_df["geometry"].centroid.y
wards_df.rename(columns={'FINALR_DEA': 'name'}, inplace=True)




  wards_df["lon"] = wards_df["geometry"].centroid.x

  wards_df["lat"] = wards_df["geometry"].centroid.y


In [112]:
belfast_wards = ['N10000301', 'N10000302', 'N10000303', 'N10000304', 'N10000305', 'N10000306', 'N10000307', 'N10000308', 'N10000309', 'N10000310']

#wards_df[wards_df.WARDNAME.apply(lambda x: len([w for w in belfast_wards if w in x])>0)]
wards_df = wards_df[wards_df.ONS_CODE.isin(belfast_wards)]

wards_df = wards_df[['name', 'ONS_CODE', 'lat', 'lon']]


In [115]:
wards_df.to_csv("/Users/finn/Documents/GitHub/RADataHub/Economic Inactivity/belfast_wards.csv", index=False)

In [111]:
wards_df[wards_df.ONS_CODE.isin(belfast_wards)]


Unnamed: 0,name,ONS_CODE,lat,lon
8,BALMORAL,N10000301,54.554062,-5.960178
16,BLACK MOUNTAIN,N10000302,54.590505,-6.006519
17,BOTANIC,N10000303,54.581114,-5.938166
21,CASTLE,N10000304,54.633818,-5.925555
28,COLLIN,N10000305,54.560418,-6.022616
31,COURT,N10000306,54.60899,-5.978362
56,LISNASHARRAGH,N10000307,54.575236,-5.891728
64,OLDPARK,N10000308,54.629515,-5.976764
66,ORMISTON,N10000309,54.597079,-5.845458
76,TITANIC,N10000310,54.610446,-5.88668


In [93]:
wards_df[wards_df.WARDNAME.str.contains('Belvoir', case=False)]

Unnamed: 0,OBJECTID,WARDNAME,WardCode,SHAPE_Length,SHAPE_Area,geometry,lon,lat
88,89,BELVOIR,N08000311,14491.873628,5466012.0,"POLYGON ((-5.92388 54.56849, -5.92396 54.56850...",-5.937328,54.551763


In [87]:
wards_df.query("'BA' in WARDNAME")

Unnamed: 0,OBJECTID,WARDNAME,WardCode,SHAPE_Length,SHAPE_Area,geometry,lon,lat


In [32]:
list(dict(df.Series.value_counts()).keys())

['Economically active: Employee (including full-time students)',
 'Economically active: Self-employed (including full-time students)',
 'Economically active: Unemployed (including full-time students)',
 'Economically inactive: Retired',
 'Economically inactive: Student',
 'Economically inactive: Looking after home or family',
 'Economically inactive: Long-term sick or disabled',
 'Economically inactive: Other',
 'No code required']