In [1]:
import os
os.chdir('/Users/andrii/Documents/londons_fashionable_finds')

In [9]:
import pandas as pd

# Read the data files
shops = pd.read_csv("01_Data_Collection/clean_shops_data.csv")
house_prices = pd.read_csv("01_Data_Collection/london_house_prices_2017.csv")
earnings = pd.read_csv("01_Data_Collection/earnings_residence_borough_2021.csv")
imd = pd.read_csv("01_Data_Collection/imd_2019_london_filtered.csv")

# Drop unnecessary columns
shops = shops.drop(columns=['borough', 'borough_from_address'])

# Rename the column
shops = shops.rename(columns={'borough_from_lat_lng': 'borough'})

# Add a unique ID to the shops DataFrame
# shops['shop_id'] = shops.index

# Merge the data based on the 'borough' column
merged_data = (
    shops
    .merge(house_prices, left_on='borough', right_on='Area', how='left')
    .merge(earnings, left_on='borough', right_on='Area', how='left', suffixes=('', '_earnings'))
)

# Aggregate IMD data by calculating the mean rank for each borough
imd_agg = imd.groupby('Local Authority District name (2019)')[['Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived)', 'Income Rank (where 1 is most deprived)']].mean().reset_index()


# Merge IMD data with merged_data
merged_data = merged_data.merge(
    imd_agg,
    left_on='borough',
    right_on='Local Authority District name (2019)',
    how='left'
)

# Drop unnecessary columns from the merged data
merged_data = merged_data.drop(columns=['Unnamed: 0', 'Area', 'Area_earnings', 'Local Authority District name (2019)'])



                      name                                            address   
0            CIRCA VINTAGE                          87a Lansdowne Way, London  \
1   Retromania Fara London                       6 Upper Tachbrook St, London   
2                    Leona     Brixton Village, Unit 5 Coldharbour Ln, London   
3               ReBelished                       36 Hammersmith Grove, London   
4  Chelsea Vintage Couture  Office - Showroom, BY APPOINTMENT ONLY, Sloane...   

    latitude  longitude  rating  reviews_count                     place_id   
0  51.474835  -0.128693     4.7              9  ChIJmdDiKHUPdkgR9QrMOgW2vjc  \
1  51.493377  -0.139957     4.3             76  ChIJTzZgDuAEdkgR7NUgOuToH4k   
2  51.462156  -0.111480     5.0              4  ChIJ5x9QqWgEdkgRQM3g8mE6ay8   
3  51.495415  -0.226249     0.0              0  ChIJ79eAhtQPdkgRO5DhIpyyhhM   
4  51.492457  -0.165966     4.7             45  ChIJhe7IZxoFdkgRb3pI8hrLrlw   

   price_level                        

Unnamed: 0,name,address,latitude,longitude,rating,reviews_count,place_id,price_level,types,borough,Code,Value,Code_earnings,Pay_2021,Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived),Income Rank (where 1 is most deprived)
0,CIRCA VINTAGE,"87a Lansdowne Way, London",51.474835,-0.128693,4.7,9,ChIJmdDiKHUPdkgR9QrMOgW2vjc,,"jewelry_store, home_goods_store, clothing_stor...",Lambeth,E09000022,510000,00AY,673.6,11294.589888,11605.471910
1,Retromania Fara London,"6 Upper Tachbrook St, London",51.493377,-0.139957,4.3,76,ChIJTzZgDuAEdkgR7NUgOuToH4k,,"clothing_store, store, point_of_interest, esta...",Westminster,E09000033,1070000,00BK,771.6,16194.968750,16175.703125
2,Leona,"Brixton Village, Unit 5 Coldharbour Ln, London",51.462156,-0.111480,5.0,4,ChIJ5x9QqWgEdkgRQM3g8mE6ay8,,"clothing_store, store, point_of_interest, esta...",Lambeth,E09000022,510000,00AY,673.6,11294.589888,11605.471910
3,ReBelished,"36 Hammersmith Grove, London",51.495415,-0.226249,0.0,0,ChIJ79eAhtQPdkgRO5DhIpyyhhM,,"clothing_store, store, point_of_interest, esta...",Hammersmith and Fulham,E09000013,740000,00AN,714.5,14202.601770,13752.920354
4,Chelsea Vintage Couture,"Office - Showroom, BY APPOINTMENT ONLY, Sloane...",51.492457,-0.165966,4.7,45,ChIJhe7IZxoFdkgRb3pI8hrLrlw,,"clothing_store, store, point_of_interest, esta...",Kensington and Chelsea,E09000020,1200000,00AW,767,15419.339806,18563.563107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458,Haven House Children's Hospice Shop: Leytonstone,"849 High Road, London",51.569737,0.012280,4.8,22,ChIJ4fSpmm-n2EcR22L0FEDSTYk,,"store, point_of_interest, establishment",Waltham Forest,E09000031,430000,00BH,624.7,11735.375000,11745.701389
459,Sue Ryder,"93-95 High St, London",51.582491,-0.029880,4.3,29,ChIJ3XURusYddkgR-kfvsia9vJY,1.0,"point_of_interest, store, establishment",Waltham Forest,E09000031,430000,00BH,624.7,11735.375000,11745.701389
460,Cancer Research UK,"13 The Broadway, London",51.578813,-0.123953,4.7,7,ChIJ8abYTccbdkgRuWlgpSbn4h8,,"clothing_store, home_goods_store, point_of_int...",Haringey,E09000014,487500,00AP,542.6,11291.917241,11348.386207
461,Barnardo's,"Greater, 2 Broadway Parade, London",51.579977,-0.123170,4.1,8,ChIJdUsFzsAbdkgR5I9nBFVUidM,,"point_of_interest, store, establishment",Haringey,E09000014,487500,00AP,542.6,11291.917241,11348.386207


In [10]:

from IPython.display import display

# Remove duplicates based on the 'Shop_Name', 'Address', 'Latitude', and 'Longitude' columns
merged_data = merged_data.drop_duplicates(subset=['name', 'address', 'latitude', 'longitude'])

display(merged_data)

merged_data.shape # 644 shops

merged_data.to_csv('02_Data_preprocessing/merged_shops_data.csv', index=False)


Unnamed: 0,name,address,latitude,longitude,rating,reviews_count,place_id,price_level,types,borough,Code,Value,Code_earnings,Pay_2021,Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived),Income Rank (where 1 is most deprived)
0,CIRCA VINTAGE,"87a Lansdowne Way, London",51.474835,-0.128693,4.7,9,ChIJmdDiKHUPdkgR9QrMOgW2vjc,,"jewelry_store, home_goods_store, clothing_stor...",Lambeth,E09000022,510000,00AY,673.6,11294.589888,11605.471910
1,Retromania Fara London,"6 Upper Tachbrook St, London",51.493377,-0.139957,4.3,76,ChIJTzZgDuAEdkgR7NUgOuToH4k,,"clothing_store, store, point_of_interest, esta...",Westminster,E09000033,1070000,00BK,771.6,16194.968750,16175.703125
2,Leona,"Brixton Village, Unit 5 Coldharbour Ln, London",51.462156,-0.111480,5.0,4,ChIJ5x9QqWgEdkgRQM3g8mE6ay8,,"clothing_store, store, point_of_interest, esta...",Lambeth,E09000022,510000,00AY,673.6,11294.589888,11605.471910
3,ReBelished,"36 Hammersmith Grove, London",51.495415,-0.226249,0.0,0,ChIJ79eAhtQPdkgRO5DhIpyyhhM,,"clothing_store, store, point_of_interest, esta...",Hammersmith and Fulham,E09000013,740000,00AN,714.5,14202.601770,13752.920354
4,Chelsea Vintage Couture,"Office - Showroom, BY APPOINTMENT ONLY, Sloane...",51.492457,-0.165966,4.7,45,ChIJhe7IZxoFdkgRb3pI8hrLrlw,,"clothing_store, store, point_of_interest, esta...",Kensington and Chelsea,E09000020,1200000,00AW,767,15419.339806,18563.563107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458,Haven House Children's Hospice Shop: Leytonstone,"849 High Road, London",51.569737,0.012280,4.8,22,ChIJ4fSpmm-n2EcR22L0FEDSTYk,,"store, point_of_interest, establishment",Waltham Forest,E09000031,430000,00BH,624.7,11735.375000,11745.701389
459,Sue Ryder,"93-95 High St, London",51.582491,-0.029880,4.3,29,ChIJ3XURusYddkgR-kfvsia9vJY,1.0,"point_of_interest, store, establishment",Waltham Forest,E09000031,430000,00BH,624.7,11735.375000,11745.701389
460,Cancer Research UK,"13 The Broadway, London",51.578813,-0.123953,4.7,7,ChIJ8abYTccbdkgRuWlgpSbn4h8,,"clothing_store, home_goods_store, point_of_int...",Haringey,E09000014,487500,00AP,542.6,11291.917241,11348.386207
461,Barnardo's,"Greater, 2 Broadway Parade, London",51.579977,-0.123170,4.1,8,ChIJdUsFzsAbdkgR5I9nBFVUidM,,"point_of_interest, store, establishment",Haringey,E09000014,487500,00AP,542.6,11291.917241,11348.386207
