In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import glob

folder_path = "/content/drive/MyDrive/GEOG0105/Footfall"
all_files = (
    glob.glob(f"{folder_path}/bespokelondon_2018-*.csv") +
    glob.glob(f"{folder_path}/bespokelondon_2019-*.csv") +
    glob.glob(f"{folder_path}/bespokelondon_2020-*.csv")
)

df_list = [pd.read_csv(f) for f in all_files]
footfall_df = pd.concat(df_list, ignore_index=True)

In [3]:
footfall_df.head()

Unnamed: 0,timestamp,location,device,footfall,imputed
0,2018-01-01T00:00:00Z,800,750,3,f
1,2018-01-01T00:00:00Z,457,754,327,t
2,2018-01-01T00:00:00Z,99,764,4,t
3,2018-01-01T00:00:00Z,98,765,14,f
4,2018-01-01T00:00:00Z,102,767,51,f


In [4]:
import pandas as pd

location_path = "/content/drive/MyDrive/GEOG0105/Footfall/bespoke_london_ff_locations.csv"
locations_df = pd.read_csv(location_path)

locations_df.head()

Unnamed: 0,location,address,city,region,lat,lon,position,type
0,8,"Holborn Bars, 3 Holborn, London, EC1N 2LL, Gre...",London,Greater London,51.51806,-0.11111,,External
1,9,"4 Gray's Inn Road, London, WC1X 8HG, Greater L...",London,Greater London,51.51825,-0.11122,,External
2,10,"19-20 Great Sutton Street, London, EC1V 0DR, G...",London,Greater London,51.52329,-0.10053,,Internal
3,11,"76 New Oxford Street, London, WC1A 1EU, Greate...",London,Greater London,51.51684,-0.12749,,External
4,12,"18 Hammersmith Broadway, London, W6 7AB, Great...",London,Greater London,51.4934,-0.22403,,External


In [5]:
import geopandas as gpd
from shapely.geometry import Point

area_path = "/content/drive/MyDrive/GEOG0105/Research_Area/research_area.shp"
area_gdf = gpd.read_file(area_path).to_crs("EPSG:4326")

locations_gdf = gpd.GeoDataFrame(
    locations_df,
    geometry=gpd.points_from_xy(locations_df.lon, locations_df.lat),
    crs="EPSG:4326"
)

locations_with_lsoa = gpd.sjoin(locations_gdf, area_gdf[["lsoa21cd", "geometry"]], how="left", predicate="within")

locations_with_lsoa_clean = locations_with_lsoa.dropna(subset=["lsoa21cd"])

locations_with_lsoa_clean[["location", "lat", "lon", "lsoa21cd"]].head()

Unnamed: 0,location,lat,lon,lsoa21cd
0,8,51.51806,-0.11111,E01032740
1,9,51.51825,-0.11122,E01000914
2,10,51.52329,-0.10053,E01033489
3,11,51.51684,-0.12749,E01000855
5,13,51.51738,-0.12013,E01000914


In [6]:
footfall_with_lsoa = footfall_df.merge(
    locations_with_lsoa_clean[["location", "lsoa21cd"]],
    on="location",
    how="left"
)
footfall_with_lsoa = footfall_with_lsoa.dropna(subset=["lsoa21cd"])
footfall_with_lsoa

Unnamed: 0,timestamp,location,device,footfall,imputed,lsoa21cd
0,2018-01-01T00:00:00Z,800,750,3,f,E01004682
1,2018-01-01T00:00:00Z,457,754,327,t,E01035716
3,2018-01-01T00:00:00Z,98,765,14,f,E01035716
4,2018-01-01T00:00:00Z,102,767,51,f,E01000914
5,2018-01-01T00:00:00Z,84,768,23,f,E01000919
...,...,...,...,...,...,...
31158993,2020-08-31T23:50:00Z,983,1988,17,f,E01004734
31158996,2020-08-31T23:55:00Z,1216,931,25,f,E01004658
31158999,2020-08-31T23:55:00Z,119,1281,32,f,E01004762
31159005,2020-08-31T23:55:00Z,1209,1969,0,f,E01004658


In [7]:
footfall_with_lsoa["footfall"] = pd.to_numeric(footfall_with_lsoa["footfall"], errors="coerce")

agg_stats = footfall_with_lsoa.groupby("lsoa21cd")["footfall"].agg(
    mean_footfall="mean",
    std_footfall="std"
).reset_index()

agg_stats["cv_footfall"] = agg_stats["std_footfall"] / agg_stats["mean_footfall"]

agg_stats

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  footfall_with_lsoa["footfall"] = pd.to_numeric(footfall_with_lsoa["footfall"], errors="coerce")


Unnamed: 0,lsoa21cd,mean_footfall,std_footfall,cv_footfall
0,E01000002,20.393136,29.994286,1.470803
1,E01000005,15.384582,21.836774,1.419393
2,E01000850,43.893477,45.75944,1.042511
3,E01000853,40.937398,38.239869,0.934106
4,E01000855,67.391808,69.060294,1.024758
5,E01000914,80.940995,80.367183,0.992911
6,E01000918,60.452517,65.264852,1.079605
7,E01000919,24.701669,30.343983,1.228418
8,E01002701,6.685393,11.861501,1.774241
9,E01004657,43.541036,84.665481,1.944499


In [8]:
footfall_full = area_gdf[["lsoa21cd", "geometry"]].merge(
    agg_stats, on="lsoa21cd", how="left"
)

missing_count = footfall_full["mean_footfall"].isna().sum()
print(f"Number of LSOAs missing footfall data: {missing_count}")

Number of LSOAs missing footfall data: 12


In [12]:
import geopandas as gpd
import pandas as pd
from libpysal.weights import Queen
from shapely.geometry import Polygon

footfall_full = area_gdf[["lsoa21cd", "geometry"]].merge(
    agg_stats, on="lsoa21cd", how="left"
)

w = Queen.from_dataframe(footfall_full)

lsoa_to_index = {code: idx for idx, code in enumerate(footfall_full["lsoa21cd"])}
index_to_lsoa = {v: k for k, v in lsoa_to_index.items()}

for idx, row in footfall_full[
    footfall_full[["mean_footfall", "cv_footfall", "std_footfall"]].isna().any(axis=1)
].iterrows():
    geo_index = idx
    neighbors = w.neighbors.get(geo_index, [])

    neighbor_mean = footfall_full.loc[neighbors, "mean_footfall"].dropna()
    neighbor_cv = footfall_full.loc[neighbors, "cv_footfall"].dropna()
    neighbor_std = footfall_full.loc[neighbors, "std_footfall"].dropna()

    if not neighbor_mean.empty:
        footfall_full.at[geo_index, "mean_footfall"] = neighbor_mean.mean()
    if not neighbor_cv.empty:
        footfall_full.at[geo_index, "cv_footfall"] = neighbor_cv.mean()
    if not neighbor_std.empty:
        footfall_full.at[geo_index, "std_footfall"] = neighbor_std.mean()

footfall_full["mean_footfall"].fillna(agg_stats["mean_footfall"].mean(), inplace=True)
footfall_full["cv_footfall"].fillna(agg_stats["cv_footfall"].mean(), inplace=True)
footfall_full["std_footfall"].fillna(agg_stats["std_footfall"].mean(), inplace=True)


  w = Queen.from_dataframe(footfall_full)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  footfall_full["mean_footfall"].fillna(agg_stats["mean_footfall"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  footfall_full["cv_footfall"].fillna(agg_stats["cv_footfall"].mean(), inplace=True)
The behavior will ch

In [13]:
footfall_full

Unnamed: 0,lsoa21cd,geometry,mean_footfall,std_footfall,cv_footfall
0,E01000853,"POLYGON ((-0.12306 51.52493, -0.12303 51.52486...",40.937398,38.239869,0.934106
1,E01000919,"POLYGON ((-0.12684 51.51735, -0.12684 51.51733...",24.701669,30.343983,1.228418
2,E01000916,"POLYGON ((-0.10936 51.52292, -0.10932 51.52291...",68.493552,95.565878,1.395254
3,E01000918,"POLYGON ((-0.12282 51.51826, -0.1228 51.51825,...",60.452517,65.264852,1.079605
4,E01000851,"POLYGON ((-0.1408 51.52434, -0.14074 51.52428,...",51.384183,52.533929,1.024937
5,E01000850,"POLYGON ((-0.13 51.52246, -0.12966 51.52216, -...",43.893477,45.75944,1.042511
6,E01000855,"POLYGON ((-0.13059 51.52463, -0.13047 51.52452...",67.391808,69.060294,1.024758
7,E01000914,"POLYGON ((-0.12079 51.52322, -0.12046 51.52269...",80.940995,80.367183,0.992911
8,E01000005,"POLYGON ((-0.07571 51.51575, -0.07542 51.51555...",15.384582,21.836774,1.419393
9,E01000002,"POLYGON ((-0.08969 51.52069, -0.08973 51.52057...",20.393136,29.994286,1.470803


In [14]:
footfall_full.drop(columns=["geometry"]).to_csv("/content/drive/MyDrive/GEOG0105/footfall_stats_filled.csv", index=False)