In [1]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import folium
from folium.plugins import MarkerCluster

In [2]:
# Create folder to save dataset
base_dir = '../../data/'
landing_dir = os.path.join(base_dir, 'landing')
raw_dir = os.path.join(base_dir, 'raw')

if not os.path.exists(base_dir):
    os.makedirs(base_dir)


subfolder = 'Supermarkets'


if not os.path.exists(os.path.join(raw_dir, subfolder)):
    os.makedirs(os.path.join(raw_dir, subfolder))

In [3]:
df = pd.read_csv(f"{landing_dir}/{subfolder}/supermarkets_with_location.csv")

In [4]:
df.head(5)

Unnamed: 0,Supermarket Name,Address,Suburb,Postcode,Latitude,Longitude
0,Aldi,"8 Franklin Street, Melbourne",Melbourne,3000,-37.807383,144.962439
1,Coles,"2 Elizabeth Street, Melbourne",Melbourne,3089,-37.673315,145.155278
2,Coles,"Melbourne Central, 183-201 La Trobe Street, Me...",Melbourne,3000,-37.808785,144.966817
3,Friendly Grocer,"Shop 1, 360 Collins Street, Melbourne",Melbourne,3429,-37.581855,144.700634
4,IGA,"470 Collins Street, Melbourne",Melbourne,3000,-37.81758,144.958513


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Supermarket Name  173 non-null    object 
 1   Address           173 non-null    object 
 2   Suburb            173 non-null    object 
 3   Postcode          172 non-null    object 
 4   Latitude          172 non-null    float64
 5   Longitude         172 non-null    float64
dtypes: float64(2), object(4)
memory usage: 8.2+ KB


In [6]:
missing_postcode_data = df[df['Postcode'].isnull()]
missing_postcode_data

Unnamed: 0,Supermarket Name,Address,Suburb,Postcode,Latitude,Longitude
29,IGA,"Alira Village, 36 Adakite Drive, Berwick",Berwick,,,


In [7]:
# Manually searched for the location information corresponding to "36 Adakite Dr, Berwick" and filling it in for the missing value
df.loc[(df['Address'] == 'Alira Village, 36 Adakite Drive, Berwick') & df['Postcode'].isnull(), 'Postcode'] = '3806'
df.loc[(df['Address'] == 'Alira Village, 36 Adakite Drive, Berwick') & df['Latitude'].isnull(), 'Latitude'] = -38.048359
df.loc[(df['Address'] == 'Alira Village, 36 Adakite Drive, Berwick') & df['Longitude'].isnull(), 'Longitude'] = 145.320480

# Display the updated data
df


Unnamed: 0,Supermarket Name,Address,Suburb,Postcode,Latitude,Longitude
0,Aldi,"8 Franklin Street, Melbourne",Melbourne,3000,-37.807383,144.962439
1,Coles,"2 Elizabeth Street, Melbourne",Melbourne,3089,-37.673315,145.155278
2,Coles,"Melbourne Central, 183-201 La Trobe Street, Me...",Melbourne,3000,-37.808785,144.966817
3,Friendly Grocer,"Shop 1, 360 Collins Street, Melbourne",Melbourne,3429,-37.581855,144.700634
4,IGA,"470 Collins Street, Melbourne",Melbourne,3000,-37.817580,144.958513
...,...,...,...,...,...,...
168,IGA,"167-179 Shaws Road, Werribee",Werribee,3030,-37.890070,144.662180
169,Woolworths,"2 Watton Street, Werribee",Werribee,3030,-37.901620,144.660788
170,Woolworths,"Pacific Werribee, Heaths Road, Hoppers Crossing",Hoppers Crossing,3029,-37.874674,144.679771
171,Coles,"29 Douglas Parade, Williamstown",Williamstown,3016,-37.857626,144.897416


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Supermarket Name  173 non-null    object 
 1   Address           173 non-null    object 
 2   Suburb            173 non-null    object 
 3   Postcode          173 non-null    object 
 4   Latitude          173 non-null    float64
 5   Longitude         173 non-null    float64
dtypes: float64(2), object(4)
memory usage: 8.2+ KB


In [9]:
# save data
output_file_path = f"{raw_dir}/{subfolder}/supermarkets_info.csv"
df.to_csv(output_file_path, index=False)

In [10]:
# Create a geometry column from latitude and longitude
geometry = [Point(xy) for xy in zip(df['Longitude'], df['Latitude'])]

# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Create a folium map object, centered at a location (e.g., Melbourne)
m = folium.Map(location=[-37.8136, 144.9631], zoom_start=12)  # Adjust the zoom level as needed

# Create a MarkerCluster object
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the cluster
for _, row in gdf.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Supermarket Name']} - {row['Suburb']}",
        tooltip=row['Suburb']
    ).add_to(marker_cluster)

m

# Save the map to an HTML file for viewing in a web browser
# m.save("supermarkets_map_with_cluster.html")
