In [2]:
import pandas as pd
import folium
from geopy.geocoders import Nominatim
from tqdm import tqdm
import os
import pickle

# Load your dataset and limit it to the first 2000 rows
data = pd.read_csv(r'C:\Users\Mousa Zerai\Desktop\Developer\domestic-E08000006-Salford\certificates.csv')[['POSTCODE', 'MAINHEAT_DESCRIPTION']].head(2000)

# Filter only electrically heated houses
electric_heating_data = data[data['MAINHEAT_DESCRIPTION'].str.contains('electric', case=False, na=False)].copy()

# Initialize the geocoder
geolocator = Nominatim(user_agent="geoapi")

# Path to cache file
cache_file = 'geocode_cache.pkl'

# Load the geocoded cache if it exists
if os.path.exists(cache_file):
    with open(cache_file, 'rb') as f:
        geocode_cache = pickle.load(f)
else:
    geocode_cache = {}

# Function to geocode a postcode using cache
def geocode_postcode(postcode):
    if postcode in geocode_cache:
        return geocode_cache[postcode]
    try:
        location = geolocator.geocode(postcode)
        if location:
            geocode_cache[postcode] = (location.latitude, location.longitude)
            return geocode_cache[postcode]
        else:
            return (None, None)
    except:
        return (None, None)

# Apply geocoding only on unique postcodes
unique_postcodes = electric_heating_data['POSTCODE'].unique()
tqdm.pandas()
electric_heating_data['Coordinates'] = electric_heating_data['POSTCODE'].progress_apply(geocode_postcode)

# Cache the results
with open(cache_file, 'wb') as f:
    pickle.dump(geocode_cache, f)

# Split the coordinates into separate columns
electric_heating_data['Latitude'], electric_heating_data['Longitude'] = zip(*electric_heating_data['Coordinates'])

# Remove rows with missing coordinates
electric_heating_data = electric_heating_data.dropna(subset=['Latitude', 'Longitude'])

# Create a base map
mymap = folium.Map(location=[electric_heating_data['Latitude'].mean(), electric_heating_data['Longitude'].mean()], zoom_start=12)

# Add markers to the map for electric heating systems
for idx, row in electric_heating_data.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['POSTCODE']} - {row['MAINHEAT_DESCRIPTION']}",
        icon=folium.Icon(color='blue')
    ).add_to(mymap)

# Save the map
mymap.save('heating_systems_electric_map.html')

print(f"Map created with {len(electric_heating_data)} electric heating locations from the first 2000 rows.")





  data = pd.read_csv(r'C:\Users\Mousa Zerai\Desktop\Developer\domestic-E08000006-Salford\certificates.csv')[['POSTCODE', 'MAINHEAT_DESCRIPTION']].head(2000)
100%|████████████████████████████████████████████████████████████████████████████████| 517/517 [22:25<00:00,  2.60s/it]


Map created with 388 electric heating locations from the first 2000 rows.


In [3]:
from IPython.display import IFrame

# Display the map in the notebook
IFrame('heating_systems_electric_map.html', width=700, height=500)
