# Research and Data Collection

In this notebook, we will gather data on vintage shops, charity shops, and related stores in London using the Google Places API. We will use this data to build a model that predicts the best shops for finding valuable deals.

Our data collection process will involve:

1. Querying the Google Places API to obtain a list of relevant shops in London.
2. Extracting the necessary information (e.g., shop name, location, rating) from the API response.
3. Organizing the data in a structured format for further processing and analysis.


## Pseudo code

- Import necessary libraries (e.g., requests, pandas)

- Define Google Places API key and base URL

- Create a function to query Google Places API: 
    1. Inputs: API key, location, radius, and search terms
    2. Outputs: A list of shops with relevant information (e.g., name, location, rating) 

- Call the function with appropriate parameters to search for vintage shops, charity shops, and related stores in London

- Organize the collected data into a structured format (e.g., DataFrame)

- Save the collected data to a file (e.g., CSV)


In [3]:
YOUR_API_KEY = 'AIzaSyDcMoAKd8RG9U91lg1cw_8syNYHZb5a9cM'

In [4]:
import requests
import pandas as pd

# Define Google Places API key and base URL
api_key = YOUR_API_KEY
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

# Create a function to query Google Places API
def query_places(api_key, location, radius, search_terms):
    shops = []
    next_page_token = None

    for term in search_terms:
        while True:
            params = {
                "key": api_key,
                "location": location,
                "radius": radius,
                "keyword": term,
                "pagetoken": next_page_token
            }
            response = requests.get(base_url, params=params).json()

            for shop in response["results"]:
                shops.append({
                    "name": shop["name"],
                    "address": shop["vicinity"],
                    "latitude": shop["geometry"]["location"]["lat"],
                    "longitude": shop["geometry"]["location"]["lng"],
                    "rating": shop.get("rating", None)
                })

            next_page_token = response.get("next_page_token", None)

            if not next_page_token:
                break

    return shops

# Call the function with appropriate parameters
location = "51.5074,-0.1278"  # London coordinates
radius = 10000  # 10 km
search_terms = ["vintage shop", "charity shop"]

shops_data = query_places(api_key, location, radius, search_terms)

# Organize the collected data into a DataFrame
shops_df = pd.DataFrame(shops_data)

# Save the collected data to a CSV file
shops_df.to_csv("vintage_shops_london.csv", index=False)


In [5]:
# Examine the contents of the DataFrame
shops_df


Unnamed: 0,name,address,latitude,longitude,rating
0,Beyond Retro Soho,"19-21 Argyll St, London",51.515011,-0.141187,4.3
1,Reign Vintage,"12 Berwick St, London",51.513401,-0.134588,4.3
2,PICKNWEIGHT - VINTAGE KILO STORE,"14-18 Neal St, London",51.513848,-0.124803,3.6
3,Hunky Dory Vintage,"226 Brick Ln, London",51.524739,-0.071564,4.5
4,Wow Retro London Covent Garden,"179 Drury Ln, London",51.515647,-0.123471,4.3
5,The Brick Lane Vintage Market,"85 Brick Ln, London",51.520796,-0.072098,4.3
6,Can't Buy Me Love Vintage Shop,"16 Ave Mews, London",51.590706,-0.143872,4.3
7,Blackout II - Vintage Clothing London,"51 Endell St, London",51.514887,-0.125315,3.9
8,Beyond Retro Dalston,"92-100 Stoke Newington Rd, London",51.55337,-0.074601,4.5
9,Mero Retro,"2 Bradbury St, London",51.54863,-0.075831,5.0


In [32]:
import requests
import pandas as pd
import time
import numpy as np

# Define Google Places API key and base URL
api_key = YOUR_API_KEY
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
reverse_geocode_url = "https://maps.googleapis.com/maps/api/geocode/json"

# Create a function to send a request to the Google Places API
def send_request(api_key, location, radius, keyword, pagetoken=None):
    params = {
        "key": api_key,
        "location": location,
        "radius": radius,
        "keyword": keyword,
        "pagetoken": pagetoken
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Request failed with status code {response.status_code}: {response.text}")

# Create a function to parse the response from the Google Places API
def parse_response(response):
    results = []
    for shop in response["results"]:
        lat = shop["geometry"]["location"]["lat"]
        lng = shop["geometry"]["location"]["lng"]
        results.append({
            "name": shop["name"],
            "address": shop["vicinity"],
            "latitude": lat,
            "longitude": lng,
            "rating": shop.get("rating", None)
        })
    next_page_token = response.get("next_page_token", None)
    return results, next_page_token

# Create a function to query Google Places API
def query_places(api_key, location, radius, search_terms):
    shops = []
    borough_cache = {}

    # Generate a grid of locations using numpy's meshgrid function
    latitudes = np.arange(float(location.split(',')[0]) - 0.025 * 2, float(location.split(',')[0]) + 0.025 * 3, 0.025)
    longitudes = np.arange(float(location.split(',')[1]) - 0.025 * 2, float(location.split(',')[1]) + 0.025 * 3, 0.025)
    locations = np.meshgrid(latitudes, longitudes)
    locations = np.vstack([locations[0].flatten(), locations[1].flatten()]).T

    for term in search_terms:
        for location in locations:
            location_str = f"{location[0]},{location[1]}"
            next_page_token = None

            while True:
                # Check if borough has been cached, and if not, retrieve it using the get_borough function
                if location_str not in borough_cache:
                    borough = get_borough(api_key, location[0], location[1])
                    borough_cache[location_str] = borough
                else:
                    borough = borough_cache[location_str]

                try:
                    response = send_request(api_key, location_str, radius, term, next_page_token)
                    results, next_page_token = parse_response(response)
                    for result in results:
                        result["borough"] = borough
                    shops.extend(results)
                except Exception as e:
                    print(f"Request failed: {e}")
                    time.sleep(5)
                    continue

                if not next_page_token:
                    break

                time.sleep(2)  # Wait for 2 seconds before requesting the next page to avoid exceeding the API limit

    return shops

# Updated get_borough function to cache results
borough_cache = {}
def get_borough(api_key, lat, lng):
    location_str = f"{lat},{lng}"
    if location_str in borough_cache:
        return borough_cache[location_str]

    params = {
        "key": api_key,
        "latlng": location_str
    }
    response = requests.get(reverse_geocode_url, params=params).json()
    if response["status"] == "OK":
        for result in response["results"]:
            for component in result["address_components"]:
                if "locality" in component["types"]:
                    borough = component["long_name"]
                    borough_cache[location_str] = borough
                    return borough
    borough_cache[location_str] = None
    return None


# Call the function with appropriate parameters
location = "51.5074,-0.1278"  # London coordinates
radius = 2500  # 2.5 km varying the radius will increase the number of shops returned but increases the API query run time
search_terms = ["vintage shop", "charity shop"]

shops_data = query_places(api_key, location, radius, search_terms)

# Organize the collected data into a DataFrame
shops_df = pd.DataFrame(shops_data)

# Save the collected data to a CSV file
shops_df.to_csv("vintage_shops_london.csv", index=False)


In [33]:
shops_df.head()

Unnamed: 0,name,address,latitude,longitude,rating,borough
0,Vintage80scasuals,"Unit 63, 105 Culvert Rd, London",51.470904,-0.156681,0.0,London
1,Accessories of Old,"Next to Safestore, Arch 10 Munster Rd, London",51.472579,-0.203859,0.0,London
2,Eclectica (Clapham),"803 Wandsworth Rd, London",51.466581,-0.14846,4.9,London
3,BajaboutiqueGB,"37 Elbe St, London",51.472878,-0.185076,0.0,London
4,INSIGHT Fulham Dress Agency,"201 Munster Rd, London",51.479447,-0.211883,5.0,London


In [34]:
shops_df['borough'].unique() # had several attemps at fixing get_boroughs() with no success

array(['London'], dtype=object)

In [35]:
from IPython.display import display

display(shops_df)


Unnamed: 0,name,address,latitude,longitude,rating,borough
0,Vintage80scasuals,"Unit 63, 105 Culvert Rd, London",51.470904,-0.156681,0.0,London
1,Accessories of Old,"Next to Safestore, Arch 10 Munster Rd, London",51.472579,-0.203859,0.0,London
2,Eclectica (Clapham),"803 Wandsworth Rd, London",51.466581,-0.148460,4.9,London
3,BajaboutiqueGB,"37 Elbe St, London",51.472878,-0.185076,0.0,London
4,INSIGHT Fulham Dress Agency,"201 Munster Rd, London",51.479447,-0.211883,5.0,London
...,...,...,...,...,...,...
2469,The Salvation Army (Clapton),"122 Lower Clapton Rd, London",51.554421,-0.053815,4.9,London
2470,Age UK,"36/44 High St, London",51.581862,-0.031678,4.0,London
2471,Charity Hub,"654 Kingsland Rd, London",51.546140,-0.075496,0.0,London
2472,The Salvation Army (Dalston),"Richmond Rd, London",51.543003,-0.074395,5.0,London
