# 1. Median house price data processing(房价中位数数据处理)

### 1.1 Overview of Missing Values and Type Determination（缺失值概览与类型判断）

In [24]:
import pandas as pd
import numpy as np

In [None]:
price = pd.read_csv('data/houseprice/House price in London.csv',
                    na_values=':',        # Identify all “:” as NaN
                    keep_default_na=True)  

#  Find all columns with housing prices whose column names contain “Year ending.”
#  找到所有房价列，列名都包含“Year ending”
price_cols = [c for c in price.columns if c not in ['Local authority code',
                                                 'Local authority name',
                                                 'LSOA code',
                                                 'LSOA name']]

#  Missing rate for each row (each LSOA) = Proportion of NaN values in these columns for this row
price['missing_rate_lsoa'] = price[price_cols].isna().mean(axis=1)

#  View the LSOAs with the highest missing rates
price[['LSOA code','missing_rate_lsoa']] \
  .sort_values('missing_rate_lsoa', ascending=False) \
  .head(10)

count = (price['missing_rate_lsoa'] > 0.5).sum()
print(f"There are {count} LSOAs with a missing rate greater than 50%.")

bad_lsoa = price.loc[price['missing_rate_lsoa'] > 0.5, 'LSOA code'].tolist()
print("List of LSAs with a missing rate > 50%:", bad_lsoa)

There are 77 LSOAs with a missing rate greater than 50%.
List of LSAs with a missing rate > 50%: ['E01000048', 'E01000051', 'E01000061', 'E01000151', 'E01000422', 'E01000484', 'E01000552', 'E01000599', 'E01000604', 'E01000605', 'E01033456', 'E01000890', 'E01000940', 'E01000941', 'E01000949', 'E01000950', 'E01000954', 'E01000955', 'E01001057', 'E01001130', 'E01001220', 'E01001334', 'E01001510', 'E01001693', 'E01001702', 'E01001706', 'E01001710', 'E01001713', 'E01001746', 'E01001757', 'E01001790', 'E01001813', 'E01001845', 'E01001876', 'E01001885', 'E01001958', 'E01001961', 'E01002082', 'E01002089', 'E01002545', 'E01002588', 'E01002673', 'E01002842', 'E01002853', 'E01002857', 'E01002870', 'E01002880', 'E01002904', 'E01002907', 'E01003044', 'E01003048', 'E01003111', 'E01003125', 'E01003244', 'E01003245', 'E01003252', 'E01003337', 'E01003486', 'E01003633', 'E01003912', 'E01003967', 'E01003968', 'E01003970', 'E01003971', 'E01003973', 'E01003974', 'E01003989', 'E01004007', 'E01004011', 'E010

In [19]:
#  Directly apply .isna().mean(axis=0) to the price_cols columns.
missing_rate_time = price[price_cols].isna().mean(axis=0)

# Convert to DataFrame 
missing_rate_time = missing_rate_time.reset_index()
missing_rate_time.columns = ['period','missing_rate']

# Sorted by missing rate
missing_rate_time.sort_values('missing_rate', ascending=False).head(10)


Unnamed: 0,period,missing_rate
54,Year ending Jun 2009,0.12575
55,Year ending Sep 2009,0.124302
100,Year ending Dec 2020,0.107756
99,Year ending Sep 2020,0.107342
98,Year ending Jun 2020,0.100517
95,Year ending Sep 2019,0.090383
56,Year ending Dec 2009,0.089349
106,Year ending Jun 2022,0.087694
96,Year ending Dec 2019,0.086039
94,Year ending Jun 2019,0.085212


There are 77 LSOAs with missing values exceeding 50%, accounting for approximately 1.6% of the total number of LSOAs (77/4835 ≈ 1.6%). Considering that the remaining samples after deletion still have approximately 98.4% coverage, such sample loss is minimal and unlikely to significantly affect estimation efficiency or representativeness for most quantitative analyses (especially panel regression and spatial regression). Here, all 77 LSOAs are excluded.


In [20]:
# Exclude LSAs with a missing rate greater than 50%.

price['missing_rate_lsoa'] = price[price_cols].isna().mean(axis=1)
# Retain LSOAs with a missing rate of less than 0.5
price2 = price[price['missing_rate_lsoa'] < 0.5].copy()


In [21]:
count2 = (price2['missing_rate_lsoa'] > 0.5).sum()
print(f"There are {count2} LSOAs with a missing rate greater than 50%.")

There are 0 LSOAs with a missing rate greater than 50%.


In [22]:
# Only retain columns from Q1 1996 (Year ending Mar 1996) to Q4 2022 (Year ending Dec 2022).
drop_cols = ['Year ending Dec 1995', 'Year ending Mar 2023']

price2 = price2.drop(columns=drop_cols, errors='ignore')

### 1.2 Interpolation(插值)

In [23]:
#  Parse column names as timestamps
price_cols = [c for c in price2.columns if c.startswith('Year ending')]
dt = pd.to_datetime([c.replace('Year ending ', '') for c in price_cols],
                    format='%b %Y')

#  Tell Pandas that this is a “quarterly starting point beginning in March” series.
dt = pd.DatetimeIndex(dt.values, freq='QS-MAR')

#  Replace the list name with this DatetimeIndex. So we can use reindex + interpolate(method=‘time’) to perform interpolation
df_time = price2[price_cols].copy()
df_time.columns = dt



In [25]:
# Retain non-time columns
meta = price2[['Local authority code','Local authority name','LSOA code','LSOA name']]

df2 = pd.concat([meta, df_time],axis=1)


In [26]:
# Convert to numeric type and remove any decimal commas.
df_time = df_time.applymap(lambda x: float(str(x).replace(',','')) if pd.notna(x) else np.nan)

  df_time = df_time.applymap(lambda x: float(str(x).replace(',','')) if pd.notna(x) else np.nan)


In [27]:
# Transpose: rows → time, columns → LSOA
ts = df_time.T

In [28]:
# Calculate the longest missing streak for each LSOA.
max_run = {}
for lsoa in ts.columns:
    s = ts[lsoa].isna()
    runs = []
    in_run = False
    length = 0
    for flag in s:
        if flag:
            length += 1
            in_run = True
        elif in_run:
            runs.append(length)
            length = 0
            in_run = False
    if in_run:
        runs.append(length)
    max_run[lsoa] = max(runs) if runs else 0

# Create DataFrame
df_runs = pd.DataFrame.from_dict(max_run, orient='index', columns=['max_gap'])
df_runs.index.name = 'LSOA code'
df_runs = df_runs.reset_index()

# Grouped by longest gap length
bins = [ -1,  4,   6,   8,  float('inf')]
labels = ['≤4 quarter','5–6 quarter','7–8 quarter','>8 quarter']
df_runs['category'] = pd.cut(df_runs['max_gap'], bins=bins, labels=labels)

# Count how many LSAs there are in each category.
counts = df_runs['category'].value_counts().reindex(labels).fillna(0).astype(int)

print("Number of LSAs under each gap length category:")
print(counts)

Number of LSAs under each gap length category:
category
≤4 quarter     4164
5–6 quarter     198
7–8 quarter     112
>8 quarter      282
Name: count, dtype: int64


#### 1.2.1  First round: seasonal interpolation, limited to missing values in ≤4 consecutive quarters

In [29]:
# Copy one as the result after seasonal interpolation.
ts_seasonal = ts.copy()

# Perform seasonal mean interpolation for each LSOA (each column), processing only missing segments with ≤4 consecutive quarters.
for col in ts_seasonal.columns:
    s = ts_seasonal[col]
    missing = s.isna()
    runs = []
    in_run = False
    prev = None

    # Find all missing intervals
    for t in s.index:
        if missing.loc[t] and not in_run:
            in_run = True
            run_start = t
        elif not missing.loc[t] and in_run:
            run_end = prev
            runs.append((run_start, run_end))
            in_run = False
        prev = t
    if in_run:
        runs.append((run_start, prev))

    # For each interval, if the length is ≤4, fill it in.
    for start, end in runs:
        start_idx = s.index.get_loc(start)
        end_idx   = s.index.get_loc(end)
        length = end_idx - start_idx + 1
        if length <= 4:
            for t in s.index[start_idx:end_idx+1]:
                # same season of the previous and following years
                t_minus = t - pd.DateOffset(months=12)
                t_plus  = t + pd.DateOffset(months=12)
                vals = []
                if t_minus in s.index and pd.notna(s.loc[t_minus]):
                    vals.append(s.loc[t_minus])
                if t_plus in s.index and pd.notna(s.loc[t_plus]):
                    vals.append(s.loc[t_plus])
                if len(vals) == 2:
                    s.loc[t] = sum(vals) / 2
                elif len(vals) == 1:
                    s.loc[t] = vals[0]
                # When len(vals) == 0, retain NaN and handle it with subsequent methods.

    ts_seasonal[col] = s


In [30]:
# Convert the interpolated ts_seasonal back to a wide table and merge meta
df_seasonal = ts_seasonal.T.reset_index(drop=True)
meta = meta.reset_index(drop=True)
df_seasonal = pd.concat([meta, df_seasonal], axis=1)

#### 1.2.2  Second round (gaps of 5–8 quarters): Seasonally adjusted + trend interpolation

In [None]:
from statsmodels.tsa.seasonal import STL
def seasonal_trend_impute(series, max_gap=8):
    """
    For gaps in quarters 5–6, fill them using seasonal + trend decomposition:  
      1. Temporarily perform linear interpolation on the entire series to ensure no NaN values (only for STL decomposition)  
      2. Perform STL decomposition on the interpolated series (period=4)  
      3. Interpolate the trend component using time interpolation (limit=max_gap)  
      4. Use the seasonal component directly.  
      5. Reconstruct y_hat = trend_filled + seasonal.  
      6. For intervals in the original series with consecutive missing values exceeding max_gap, y_hat retains NaN.
    """
    s = series.copy()
    # 1. Temporarily interpolate the entire sequence linearly and fill in all missing values.
    s_interp = s.interpolate(method='time', limit_direction='both')
    # 2. STL decomposition
    stl = STL(s_interp, period=4, robust=True).fit()
    trend    = stl.trend
    seasonal = stl.seasonal

    # 3. Interpolate trend components, limited to max_gap
    trend_filled = trend.interpolate(
        method='time', 
        limit=max_gap, 
        limit_direction='both'
    )

    # 4. Rebuild
    y_hat = trend_filled + seasonal

    # 5. Restore ultra-long gaps to NaN
    is_na = s.isna()
    runs = []
    in_run = False
    prev = None
    for t in s.index:
        if is_na[t] and not in_run:
            in_run, start = True, t
        elif (not is_na[t]) and in_run:
            runs.append((start, prev))
            in_run = False
        prev = t
    if in_run:
        runs.append((start, prev))

    for start, end in runs:
        length = s.index.get_loc(end) - s.index.get_loc(start) + 1
        if length > max_gap:
            y_hat.loc[start:end] = np.nan

    return y_hat


# ts_seasonal: Results after the first round of interpolation, row = time index, column = LSOA
ts_trend = ts_seasonal.apply(seasonal_trend_impute, axis=0)

# Return to wide table and merge meta
df_trend = ts_trend.T.reset_index(drop=True)
df_trend = pd.concat([meta, df_trend], axis=1)


####  1.2.3 Third round (>8-quarter gaps): Interpolate using two paths based on the number of remaining valid observations for each series:
1. Sufficient observations available for a single series (non-empty observations ≥	12 quarters): Fill in using SARIMA sequential prediction.
2. Too few observations (non-empty observations <12 quarters): Use spatial IDW. 	At each gap point, find the 8 nearest neighbors and use inverse distance weighted 	averaging to fill in the gaps. Maintain spatial smoothness and fill in 	long gaps that 	are “isolated islands.”

In [32]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.neighbors import KDTree
import geopandas as gpd

In [33]:
# 1. Calculate the number of non-empty observations and the longest gap for each sequence.
non_na = ts_trend.notna().sum()
max_gap = {}
for col in ts_trend.columns:
    flags = ts_trend[col].isna().values
    runs, count = [], 0
    for f in flags:
        if f:
            count += 1
        else:
            if count>0: runs.append(count)
            count = 0
    if count>0: runs.append(count)
    max_gap[col] = max(runs) if runs else 0

# 2. Classification
sarima_ls = [l for l in ts_trend.columns if max_gap[l] > 8 and non_na[l] >= 12]
idw_ls    = [l for l in ts_trend.columns if max_gap[l] > 8 and non_na[l] < 12]

# 3. SARIMAX interpolation
for lsoa in sarima_ls:
    series = ts_trend[lsoa].copy()
    # Find all segments with more than 8 gaps
    is_na = series.isna()
    runs, start_pos = [], None
    for i, t in enumerate(series.index):
        if is_na.iloc[i] and start_pos is None:
            start_pos = i
        elif not is_na.iloc[i] and start_pos is not None:
            length = i - start_pos
            if length > 8:
                runs.append((start_pos, i-1))
            start_pos = None
    if start_pos is not None and len(series) - start_pos > 8:
        runs.append((start_pos, len(series)-1))
    # Fit SARIMAX
    # Here, (1,1,1)x(1,1,1,4) is used as an example, but you can adjust the parameters yourself.
    model = SARIMAX(
        series.dropna(),
        order=(1,1,1),
        seasonal_order=(1,1,1,4),
        enforce_stationarity=False,
        enforce_invertibility=False
    ).fit(disp=False)
    for start, end in runs:
        # Based on observations up to first_obs = start-1, predict length steps ahead.
        length = end - start + 1
        # The old version of SARIMAX requires the start/end index positions to be passed in.
        # Here, we simply use get_forecast and then take predicted_mean.
        forecast = model.get_forecast(steps=length).predicted_mean
        # Fill in the prediction
        series.iloc[start:start+length] = forecast.values
    ts_trend[lsoa] = series

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_i

In [35]:
# 4. Spatial IDW
# 4.1 Read LSOA center point
gdf = gpd.read_file('data/LSOA_21.geojson').to_crs(epsg=27700)
gdf['centroid'] = gdf.geometry.centroid
coords = np.vstack([gdf.centroid.x, gdf.centroid.y]).T
tree = KDTree(coords, leaf_size=10)

for lsoa in idw_ls:
    series = ts_trend[lsoa]
    i = list(ts_trend.columns).index(lsoa)
    is_na = series.isna().values
    # Similarly, find >8 gap segments.
    runs, start_pos = [], None
    for idx, flag in enumerate(is_na):
        if flag and start_pos is None:
            start_pos = idx
        elif not flag and start_pos is not None:
            length = idx - start_pos
            if length > 8:
                runs.append((start_pos, idx-1))
            start_pos = None
    if start_pos is not None and len(series)-start_pos > 8:
        runs.append((start_pos, len(series)-1))
    # Perform IDW for each time point in each long gap segment.
    for start, end in runs:
        for pos in range(start, end+1):
            t = series.index[pos]
            # Check the code for this LSOA in gdf.
            row = gdf[gdf['LSOA21CD']==lsoa].index[0]
            dists, nbrs = tree.query(coords[row:row+1], k=8)
            vals, wts = [], []
            for dist, nbr in zip(dists[0], nbrs[0]):
                v = ts_trend.iloc[pos, nbr]
                if not np.isnan(v):
                    vals.append(v)
                    wts.append(1.0/dist if dist>0 else 1.0)
            if vals:
                series.iloc[pos] = np.average(vals, weights=wts)
    ts_trend[lsoa] = series

In [36]:
# 5. Finally merge back into the wide table
df_final = pd.concat([meta, ts_trend.T.reset_index(drop=True)], axis=1)

In [37]:
# Identify all “Year ending” or timestamp columns
numeric_cols = [c for c in df_final.columns if isinstance(c, pd.Timestamp)]

# Round to the nearest integer and convert to pandas' nullable integer type Int64.
df_final[numeric_cols] = (
    df_final[numeric_cols]
      .round(0)                    
      .astype('Int64')             # convert to nullable integer
)

print(df_final[numeric_cols].dtypes.unique())

[Int64Dtype()]


In [38]:
df_final.to_csv('data/houseprice/Houseprice_Processed.csv')

# 2. Rail transit data Processing and Visualization

In [None]:
import json

gdf = gpd.read_file("data/transportation/london_lines.geojson")

records = []

for _, row in gdf.iterrows():
    geometry_wkt = row['geometry'].wkt
    line_entries = row['lines_parsed']
    
    if isinstance(line_entries, str):
        try:
            line_entries = json.loads(line_entries)
        except json.JSONDecodeError:
            continue

    for line in line_entries:
        record = {
            "id": row['id'],
            "name": line.get("name"),
            "opened": line.get("opened"),
            "night_opened": line.get("nightopened"),
            "closed": line.get("closed"),
            "opened_type": "night" if "nightopened" in line else "day" if "opened" in line else None,
            "start_sid": line.get("start_sid"),
            "end_sid": line.get("end_sid"),
            "geometry": geometry_wkt
        }
        records.append(record)

london_metrolines = pd.DataFrame(records)
london_metrolines.to_csv("data/transportation/london_metrolines_detailed.csv", index=False)


In [40]:
london_overground = london_metrolines[london_metrolines['name'] == 'London Overground']

london_overground.to_csv("data/transportation/londonOverground_lines.csv")

In [42]:
# Extract site data records
stations_gdf = gpd.read_file("data/transportation/london_stations2.geojson")

station_records = []

for _, row in stations_gdf.iterrows():
    lines_info = row['lines_parsed']
    if isinstance(lines_info, str):
        try:
            lines_info = json.loads(lines_info)
        except json.JSONDecodeError:
            continue

    if isinstance(lines_info, list):
        for line in lines_info:
            station_records.append({
                'id': row['id'],
                'name': row['name'],
                'zone': row['zone'],
                'line_name': line.get('name'),
                'opened': line.get('opened'),
                'nightopened': line.get('nightopened'),
                'closed': line.get('closed'),
                'geometry': row['geometry'].wkt  
            })

station_df = pd.DataFrame(station_records)
csv_path = "data/transportation/london_stations.csv"
station_df.to_csv(csv_path, index=False)


In [43]:
# Filter stations belonging to London Overground
london_overground_stations = station_df[station_df['line_name'] == 'London Overground']

# Deduplication (some sites may appear multiple times in Overground)
london_overground_stations = london_overground_stations.drop_duplicates(subset=['id'])

london_overground_stations.to_csv("data/transportation/londonOverground_stations.csv")


In [2]:
import folium
import geopandas as gpd
import pandas as pd
from shapely import wkt
import matplotlib.pyplot as plt
import seaborn as sns
from folium import FeatureGroup, Element
import matplotlib.colors as mcolors


lines_df = pd.read_csv("data/transportation/londonOverground_lines.csv")
stations_df = pd.read_csv("data/transportation/londonOverground_stations.csv")
lines_df["geometry"] = lines_df["geometry"].apply(wkt.loads)
stations_df["geometry"] = stations_df["geometry"].apply(wkt.loads)

lines_gdf = gpd.GeoDataFrame(lines_df, crs="EPSG:4326", geometry="geometry")
stations_gdf = gpd.GeoDataFrame(stations_df, crs="EPSG:4326", geometry="geometry")


lines_gdf["opened"] = lines_gdf["opened"].replace(2011, 2010)
stations_gdf["opened"] = stations_gdf["opened"].replace(2011, 2010)

# Read Borough boundary data
boroughs_gdf = gpd.read_file("data/London_Boroughs.geojson").to_crs(epsg=4326)
boroughs_gdf["name"] = boroughs_gdf["name"].replace("Hammersmith and Fulham", "Hammersmith")
boroughs_gdf["name"] = boroughs_gdf["name"].replace("Kensington and Chelsea", "Kensington")

gla_gdf = gpd.read_file("data/GLA boundary/London_GLA_Boundary.shp").to_crs(epsg=4326)


years = sorted(lines_gdf["opened"].unique())

import numpy as np
# 使用 matplotlib colormap，并从较深部分采样
cmap = plt.get_cmap("Oranges")
color_values = np.linspace(0.35, 0.95, len(years))
palette = [mcolors.rgb2hex(cmap(val)) for val in color_values]
color_map = {year: color for year, color in zip(years, palette)}


#  Initialize map
m = folium.Map(
    location=[51.5074, -0.1278],
    zoom_start=10,
    tiles=None,
    scrollWheelZoom=True,
    wheelPxPerZoomLevel=150,
    zoomDelta=0.5,
    zoomSnap=0,
    control_scale=True
)

# Add background image switching
folium.TileLayer("CartoDB positron", name="Light").add_to(m)
folium.TileLayer("CartoDB dark_matter", name="Dark").add_to(m)
folium.TileLayer("OpenStreetMap", name="OSM").add_to(m)

# Overlay boundary layer 
# Boroughs 
folium.GeoJson(
    boroughs_gdf,
    name="London Boroughs",
    style_function=lambda f: {
        "color": "#999999",
        "weight": 1,
        "fill": True,
        "fillOpacity": 0.05
    }
).add_to(m)

# GLA 
folium.GeoJson(
    gla_gdf,
    name="GLA Boundary",
    style_function=lambda f: {
        "color": "#000000",
        "weight": 2,
        "fill": False
    }
).add_to(m)

# Add Borough Name Tag
for _, row in boroughs_gdf.iterrows():
    centroid = row.geometry.centroid
    folium.Marker(
        location=[centroid.y, centroid.x],
        icon=folium.DivIcon(html=f"""
            <div style='font-size:12px; color:#444;'>{row["name"]}</div>
        """)
    ).add_to(m)

# Add annual routes + stations
for year in years:
    fg = FeatureGroup(name=f"Line/Stations {int(year)}", show=True)

    # lines
    subset = lines_gdf[lines_gdf["opened"] == year]
    folium.GeoJson(
        subset,
        style_function=lambda feat, col=color_map[year]: {
            "color": col,
            "weight": 4
        }
    ).add_to(fg)

    # stations
    stations = stations_gdf[stations_gdf["opened"] == year]
    for _, st in stations.iterrows():
        tooltip = folium.Tooltip(f"{st.get('station_name', 'Station')}<br>Opened: {year}")
        popup = folium.Popup(f"<b>{st.get('station_name', 'Station')}</b><br>Opened: {year}<br>Borough: {st.get('borough', 'Unknown')}", max_width=250)
        folium.CircleMarker(
            location=[st.geometry.y, st.geometry.x],
            radius=4,
            color=color_map[year],
            weight=1.5,
            fill=True,
            fill_color="white",
            fill_opacity=1,
            tooltip=tooltip,
            popup=popup
        ).add_to(fg)

    fg.add_to(m)


folium.LayerControl(collapsed=False).add_to(m)

# legend
legend_html = """
<div style="
    position: fixed; 
    bottom: 50px; left: 50px; 
    width: 200px; 
    background-color: rgba(255,255,255,0.8); 
    z-index:9999; 
    font-size:14px;
    font-family: sans-serif;
    padding: 10px;
    box-shadow: 2px 2px 8px rgba(0,0,0,0.3);
">
<b>Opened Year</b><br>
""" + "".join([
    f"""<div style="display:flex; align-items:center; margin-bottom:4px;">
        <div style="background:{col}; border-radius:50%; width:12px; height:12px; margin-right:6px;"></div>
        {int(yr)}
    </div>""" for yr, col in color_map.items()
]) + """
<hr style="margin:8px 0">
<b>Boundaries</b><br>
<div style="display:flex; align-items:center; margin-bottom:4px;">
    <div style="width:20px; height:12px; background:rgba(0,0,0,0.1); border:1px solid #999; margin-right:6px;"></div>
    London boroughs
</div>
<div style="display:flex; align-items:center;">
    <div style="width:20px; height:12px; background:white; border:3px solid black; margin-right:6px;"></div>
    GLA boundary
</div>
</div>
"""
m.get_root().html.add_child(Element(legend_html))


m.get_root().header.add_child(Element("""
<style>
body {
  font-family: "Helvetica Neue", sans-serif;
}
.leaflet-tooltip {
  background: rgba(255,255,255,0.9);
  border: 1px solid #999;
  font-size: 12px;
}
</style>
"""))


m.save("output/London_Overground_Visualization.html")
m

# Please select light background image