In [11]:
%load_ext pycodestyle_magic
%pycodestyle_on

In [12]:
import pandas as pd
import geopandas as gpd
import os
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
import requests
import json
import pyproj

In [13]:
def geo_df_from_csv(path_to_csv, geom_x, geom_y, delim='\t', crs ="EPSG:4326"):
    """Function to create a Geo-dataframe from a csv file.
        The process goes via Pandas
    
        Arguments:
            path_to_csv (string): path to the txt/csv containing geo data
                to be read
            delimiter (string): the seperator in the csv file e.g. "," or "\t" 
            geom_x (string):name of the column that contains the longitude data
            geom_y (string):name of the column that contains the latitude data
            
        Returns:
            Geopandas Dataframe
            """
    pd_df = pd.read_csv(path_to_csv, delim)
    geometry = [Point(xy) for xy in zip(pd_df[geom_x], pd_df[geom_y])]
    geo_df = gpd.GeoDataFrame(pd_df, geometry=geometry)
    geo_df.crs = crs
    return geo_df


stops_path = (os.path.join
              (os.getcwd(),
               'data',
               'Stops.txt'))

stops_geo_df = (geo_df_from_csv(path_to_csv=stops_path,
                            delim='\t',
                            geom_x='stop_lon',
                            geom_y='stop_lat'))
stops_geo_df.sample(15)

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,stop_url,vehicle_type,geometry
89544,1600GLS700,glodawgd,"Nailsworth, Swan Bank (NE-bound)",51.68782,-2.2253,,3.0,POINT (-2.22530 51.68782)
96009,1800WK11381,MANTGWGW,"Crankwood, Crankwood Rd (Os House 482)",53.49463,-2.56221,,3.0,POINT (-2.56221 53.49463)
100516,1800EH00881,MANATWTJ,"Broadbottom, Broadbottom Rd/Bucklow Cl (nr)",53.44709,-2.01856,,3.0,POINT (-2.01856 53.44709)
207852,3600SOA10109,sotagjwt,"Bishop's Lydeard, Lethbridge Arms (W-bound)",51.05528,-3.19005,,3.0,POINT (-3.19005 51.05528)
48296,1000DPWR7136,dbsawtwd,"Pinxton, Platt Street (Adj)",53.08749,-1.32126,,3.0,POINT (-1.32126 53.08749)
211908,370021639,37021639,"Waterthorpe, Crystal Peaks Bus Station (CP8)",53.34167,-1.35366,,3.0,POINT (-1.35366 53.34167)
286948,4600WIA11818,wilawtga,"Ditchampton, The Hollows (N-bound)",51.08397,-1.86926,,3.0,POINT (-1.86926 51.08397)
354789,6200248392,36237837,"Swanston, Turning Circle (at)",55.901,-3.22594,,3.0,POINT (-3.22594 55.90100)
78696,150035002008,esxapgwt,"Potter Street, Pytt Field (opp)",51.76358,0.12699,,3.0,POINT (0.12699 51.76358)
9189,02901096,ltnagam,"Vauxhall Park, Newnham Close (Opp)",51.88707,-0.38076,,3.0,POINT (-0.38076 51.88707)


1:65: E251 unexpected spaces around keyword / parameter equals
4:1: W293 blank line contains whitespace
8:79: W291 trailing whitespace
11:1: W293 blank line contains whitespace
28:29: E128 continuation line under-indented for visual indent
29:29: E128 continuation line under-indented for visual indent
30:29: E128 continuation line under-indented for visual indent


In [15]:
# # Load Greater London polygon and check CRS is 4326

def geo_df_from_geospatialfile(path_to_file, crs="EPSG:4326"):
    
    """Function to create a Geo-dataframe from a csv file.
        The process goes via Pandas
    
        Arguments:
            path_to_file (string): path to the geojson, shp and other geospatial data files

        Returns:
            Geopandas Dataframe
            """
    geo_df = gpd.read_file(path_to_file)
    if geo_df.crs != crs:
        geo_df = geo_df.to_crs("EPSG:4326")
    return geo_df
        

greater_london_path = ((os.path.join
                                (os.getcwd(),
                                 'data',
                                 'greater_london.geojson')))

greater_london_geo_df = geo_df_from_geospatialfile(greater_london_path)

greater_london_geo_df.head()


Unnamed: 0,id,EER13CD,EER13CDO,EER13NM,geometry
0,E15000007,E15000007,7,London,"MULTIPOLYGON (((-0.32111 51.44603, -0.32520 51..."


4:1: W293 blank line contains whitespace
7:1: W293 blank line contains whitespace
9:80: E501 line too long (91 > 79 characters)
18:1: W293 blank line contains whitespace
21:33: E127 continuation line over-indented for visual indent
28:1: W391 blank line at end of file


In [16]:
def find_points_in_poly(geo_df, polygon_obj):
    """Find points in polygon using geopandas' spatial join.
        Then drops all rows where the point is not in the polygon
        (based on column index_right not being NaN). Finally it
        drop all column names from that were created in the join,
        leaving only the columns of the original geo_df
        
        Arguments:
            geo_df (string): name of a geo pandas dataframe
            polygon_obj (string): a geopandas dataframe with a polygon column
            
        Returns:
            A geodata frame with the points inside the supplied polygon"""
    wanted_cols = geo_df.columns.to_list()
    joined_df = (gpd.sjoin
                 (geo_df,
                  polygon_obj,
                  how='left',
                  op='within'))
    filtered_df = (joined_df
                   [joined_df
                    ['index_right'].notna()])
    filtered_df = filtered_df[wanted_cols]
    return filtered_df


# Creating a Geo Dataframe of only stops in London
london_stops_geo_df = (find_points_in_poly
                       (geo_df=stops_geo_df,
                        polygon_obj=greater_london))

london_stops_geo_df.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,stop_url,vehicle_type,geometry
76301,150012891S,esxjdtjp,"Grange Hill, Stradbroke Park (adj)",51.60482,0.0729,,3.0,POINT (0.07290 51.60482)
79876,150042023001,esxatmga,"Grange Hill, Tudor Crescent (adj)",51.60665,0.08303,,3.0,POINT (0.08303 51.60665)
122161,210021803340,hrtajatj,"Batchworth Heath, Mount Vernon Hospital (nr)",51.6146,-0.45066,,3.0,POINT (-0.45066 51.61460)
123431,210021001322,hrtgtdad,"Dancers Hill, The Shires (nr)",51.66453,-0.20933,,3.0,POINT (-0.20933 51.66453)
134927,2400107805,kntjwmdj,"Knockholt, Scotts Lodge (opp)",51.30058,0.08625,,3.0,POINT (0.08625 51.30058)


7:1: W293 blank line contains whitespace
11:1: W293 blank line contains whitespace


In [54]:
# Building the map of Local Authority Districts

birmingham_map_path = (os.path.join
                   (os.getcwd(),
                    'data',
                    'Birmingham_merged_census_BoundaryData',
                    'england_oac_2011.shp'))

birmingham_census_geo_df = geo_df_from_geospatialfile(birmingham_map_path)

col = birmingham_census_geo_df.label.str.rsplit('E')

4:20: E128 continuation line under-indented for visual indent


0       E['', '08000025', '02001890', '01008987', '000...
1       E['', '08000025', '02001890', '01008987', '000...
2       E['', '08000025', '02001922', '01009290', '000...
3       E['', '08000025', '02001922', '01009289', '000...
4       E['', '08000025', '02001922', '01009286', '000...
                              ...                        
3218    E['', '08000025', '02001867', '01033628', '001...
3219    E['', '08000025', '02006895', '01008995', '000...
3220    E['', '08000025', '02006897', '01009143', '000...
3221    E['', '08000025', '02001909', '01009377', '000...
3222    E['', '08000025', '02001922', '01009290', '001...
Name: label, Length: 3223, dtype: object

In [31]:
def get_and_save_geo_dataset(url, localpath, filename):
    """Fetches a geodataset from a web resource and saves it to the local data/ directory
    
    Args:
        filename (string): the name of file as it should be saved locally
        url (string): URL of the web resource containing 
        localpath (string):
    Returns:
        json data as dict"""
    file = requests.get(url).json()
    full_path = os.path.join(localpath, filename)
    with open(full_path, 'w') as dset:
        json.dump(file, dset)
    return file

# url = "https://raw.githubusercontent.com/ONSvisual/topojson_boundaries/master/LSOA.json"
birmingham_json_url = 'https://mapit.mysociety.org/area/2514/children' 

birmingahm_geo_dataset = get_and_save_geo_dataset(birmingham_json_url,
                                                  './data/',
                                                  "birmingham_geo_dataset.json")

# path_to_local_file = os.path.join('data', 'birmingham.json')

# with open(path_to_local_file, 'ab') as geo_file:
#             geo_file.write(birmingahm_geo_dataset.content)
    
# # data = requests.get(url)
# # file_path = os.path.join('data', 'LSOA.json')

# # #     gdf = gpd.GeoDataFrame(geo_file.json())

# # # # df = pd.read_json(io.BytesIO(file.content))

# # # gdf = gpd.GeoDataFrame(open(file_path).json())

# # # type(file_path)
# # # file_path
# # df = gpd.read_file(open(file_path, 'rb'), driver='GeoJSON')

birmingahm_geo_dataset


{'151905': {'parent_area': 2514,
  'generation_high': 40,
  'all_names': {},
  'id': 151905,
  'codes': {'gss': 'E05011118', 'unit_id': '185'},
  'name': 'Acocks Green',
  'country': 'E',
  'type_name': 'Metropolitan district ward',
  'generation_low': 33,
  'country_name': 'England',
  'type': 'MTW'},
 '151888': {'parent_area': 2514,
  'generation_high': 40,
  'all_names': {},
  'id': 151888,
  'codes': {'gss': 'E05011119', 'unit_id': '44822'},
  'name': 'Allens Cross',
  'country': 'E',
  'type_name': 'Metropolitan district ward',
  'generation_low': 33,
  'country_name': 'England',
  'type': 'MTW'},
 '151917': {'parent_area': 2514,
  'generation_high': 40,
  'all_names': {},
  'id': 151917,
  'codes': {'gss': 'E05011120', 'unit_id': '63'},
  'name': 'Alum Rock',
  'country': 'E',
  'type_name': 'Metropolitan district ward',
  'generation_low': 33,
  'country_name': 'England',
  'type': 'MTW'},
 '151943': {'parent_area': 2514,
  'generation_high': 40,
  'all_names': {},
  'id': 15194

2:80: E501 line too long (89 > 79 characters)
3:1: W293 blank line contains whitespace
6:57: W291 trailing whitespace
14:80: E501 line too long (90 > 79 characters)
15:1: E305 expected 2 blank lines after class or function definition, found 1
15:71: W291 trailing whitespace
19:80: E501 line too long (80 > 79 characters)
25:1: W293 blank line contains whitespace
40:1: W391 blank line at end of file


In [None]:

uk_census_df = geo_df_from_geospatialfile(path_to_file, crs="EPSG:4326")

In [49]:
# geo_df_from_geospatialfile(os.path.join
#                            (os.getcwd(),
#                             'data',
#                             'birmingham_geo_dataset.json'))
    
birmingham_df = pd.DataFrame.from_dict(birmingahm_geo_dataset).T
gss_code_cols = pd.DataFrame.from_dict(birmingahm_geo_dataset).T.codes.apply(pd.Series).drop("ons", axis=1) 
birmingham_df = birmingham_df.join(gss_code_cols, on=index).drop(["codes", "all_names"], axis=1) 

NameError: name 'index' is not defined

5:1: W293 blank line contains whitespace
7:80: E501 line too long (107 > 79 characters)
7:108: W291 trailing whitespace
8:80: E501 line too long (96 > 79 characters)
8:97: W291 trailing whitespace


In [48]:
birmingham_df

Unnamed: 0,parent_area,generation_high,id,name,country,type_name,generation_low,country_name,type,gss,unit_id
151905,2514,40,151905,Acocks Green,E,Metropolitan district ward,33,England,MTW,E05011118,185
151888,2514,40,151888,Allens Cross,E,Metropolitan district ward,33,England,MTW,E05011119,44822
151917,2514,40,151917,Alum Rock,E,Metropolitan district ward,33,England,MTW,E05011120,63
151943,2514,40,151943,Aston,E,Metropolitan district ward,33,England,MTW,E05011121,22
151933,2514,40,151933,Balsall Heath West,E,Metropolitan district ward,33,England,MTW,E05011122,21
...,...,...,...,...,...,...,...,...,...,...,...
151908,2514,40,151908,Tyseley & Hay Mills,E,Metropolitan district ward,33,England,MTW,E05011182,44817
151944,2514,40,151944,Ward End,E,Metropolitan district ward,33,England,MTW,E05011183,44836
151892,2514,40,151892,Weoley & Selly Oak,E,Metropolitan district ward,33,England,MTW,E05011184,95
151911,2514,40,151911,Yardley East,E,Metropolitan district ward,33,England,MTW,E05011185,44834


In [20]:
# Filter for ward

ward_stops_geo_df = gpd.sjoin(london_stops_geo_df, Brent, how='left', op='within')
ward_stops_geo_df.head()

# Drop all rows where id (from polygon) is NaN, that is, where the point is not in the polygon

ward_stops_geo_df = ward_stops_geo_df[ward_stops_geo_df['index_right'].notna()]
ward_stops_geo_df

# Drop all row names from join (so we can reuse)

ward_stops_geo_df = ward_stops_geo_df[stops_geo_df.columns.to_list()]
ward_stops_geo_df

NameError: name 'Brent' is not defined

3:80: E501 line too long (82 > 79 characters)
6:80: E501 line too long (94 > 79 characters)


In [21]:
## Making a centroid 
centrepoint = ward_polygon.centroid

fig, ax = plt.subplots()
_ = ward_polygon.plot(ax=ax, facecolor='gold')
_ = ward_stops_geo_df.plot(ax=ax, color='red', markersize=2, alpha=0.1)
_ = centrepoint.plot(ax=ax, color='pink', markersize=45) ## added the centroid into the plot
plt.tight_layout()

## great, this works!

NameError: name 'ward_polygon' is not defined

1:1: E266 too many leading '#' for block comment
1:21: W291 trailing whitespace
7:57: E261 at least two spaces before inline comment
7:58: E262 inline comment should start with '# '
7:80: E501 line too long (92 > 79 characters)
10:1: E266 too many leading '#' for block comment
