In [13]:
import requests
from bs4 import BeautifulSoup
import csv
import time

# URL of the Target store directory
main_url = "https://www.target.com/store-locator/store-directory"

# Send a GET request to the main store directory page
response = requests.get(main_url)

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all state links (Adjust the selector based on the actual structure)
    state_links = soup.select('a[href^="/store-locator/find-stores"]')

    # List to hold all store addresses
    store_addresses = []

    # Iterate over each state link
    for state_link in state_links:
        state_url = "https://www.target.com" + state_link['href']
        state_name = state_link.text.strip()
        
        # Fetch the page for each state
        state_response = requests.get(state_url)
        if state_response.status_code == 200:
            state_soup = BeautifulSoup(state_response.text, 'html.parser')
            
            # Find all city links in that state
            city_links = state_soup.select('a[href^="/sl/"]')  # Adjust the selector based on actual structure
            
            for city_link in city_links:
                city_url = "https://www.target.com" + city_link['href']
                city_name = city_link.text.strip()
                
                # Fetch the page for each city
                city_response = requests.get(city_url)
                if city_response.status_code == 200:
                    city_soup = BeautifulSoup(city_response.text, 'html.parser')
                    
                    # Find the store address on the city page
                    store_info = city_soup.find('div', class_='StoreInfoStyles__Address-sc-1ylk90e-1 dSyVqU')  # Example class name
                    
                    if store_info:
                        address = store_info.text.strip()
                        store_addresses.append([state_name, city_name, address])
                
                # Sleep for a short time to avoid overwhelming the server
                time.sleep(1)
        
        # Sleep for a short time between states
        time.sleep(2)

    # Write all addresses to a CSV file
    with open('target_store_addresses.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['State', 'City', 'Store Address'])
        writer.writerows(store_addresses)
    
    print(f"Scraped {len(store_addresses)} store addresses.")
else:
    print(f"Failed to retrieve the main store directory. Status code: {response.status_code}")


Scraped 0 store addresses.


In [116]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import re

# Base URL for the Target store directory
base_url = "https://www.target.com"

count = 0

# Dictionary to store raw JSON data for each store
raw_store_data = {}

# Send a GET request to the main store directory page
response = requests.get(f"{base_url}/store-locator/store-directory")

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all state links - use the actual href attribute to get the correct URLs
    state_links = soup.select('a[href^="/store-locator/store-directory/"]')

    # List to hold all store addresses and coordinates
    store_data = []

    # Iterate over each state link
    for state_link in state_links:
        state_url = base_url + state_link['href']
        state_name = state_link.text.strip()
        # print(state_name) 

        # if state_name != 'Arizona': continue

        # Fetch the page for each state
        state_response = requests.get(state_url)
        if state_response.status_code == 200:
            state_soup = BeautifulSoup(state_response.text, 'html.parser')
            
            # Revised regex pattern to match the href values
            pattern = r'\/sl\/[a-z0-9\-]+\/[0-9]+'

            # Using re.findall to match all occurrences of the pattern
            store_urls = re.findall(pattern, state_response.text)

            # Removing duplicates by converting to a set and back to a list
            store_urls = list(set(store_urls))

            for store_url in store_urls:
                count += 1
                full_store_url = base_url + store_url
                
                # Fetch the store details page
                store_response = requests.get(full_store_url)
                if store_response.status_code == 200:
                    store_soup = BeautifulSoup(store_response.text, 'html.parser')
                    
                    # Extract store address and geo-coordinates from JSON-LD script
                    scripts = store_soup.find_all('script', type='application/ld+json')
                    for script in scripts:
                        try:
                            data = json.loads(script.string)
                            if data['@type'] == 'DepartmentStore':
                                address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
                                latitude = data['geo']['latitude']
                                longitude = data['geo']['longitude']
                                
                                store_name = data.get('name', 'Unknown Store')
                                city_name = data['address']['addressLocality']

                                store_data.append([state_name, city_name, store_name, address, latitude, longitude])

                                # Store raw JSON data in the dictionary
                                store_key = f"{state_name}_{city_name}_{store_name}_{data['@id']}"
                                raw_store_data[store_key] = data

                                print(f"{count}, State: {state_name}, City: {city_name}, Store: {store_name}, Address: {address}, Latitude: {latitude}, Longitude: {longitude}")
                                break  # Move to the next store after finding the relevant data
                        except (json.JSONDecodeError, KeyError) as e:
                            print(f"Error processing JSON data: {e}")
            
            # Sleep for a short time between requests to avoid overwhelming the server
            time.sleep(1)
        
        # Sleep for a short time between states
       


1, State: Alabama, City: Bessemer, Store: Target Store - , Address: 4889 Promenade Pkwy, Bessemer, AL 35022-7305, Latitude: 33.334428, Longitude: -86.990326
2, State: Alabama, City: Fultondale, Store: Target Store - , Address: 3489 Lowery Pkwy, Fultondale, AL 35068-1677, Latitude: 33.605595, Longitude: -86.80623
3, State: Alabama, City: Mobile, Store: Target Store - , Address: 3150 Bel Air Mall, Mobile, AL 36606-3203, Latitude: 30.671392, Longitude: -88.12001
4, State: Alabama, City: Oxford, Store: Target Store - , Address: 400 Oxford Exchange Blvd, Oxford, AL 36203-3459, Latitude: 33.608825, Longitude: -85.783182
5, State: Alabama, City: Decatur, Store: Target Store - , Address: 1235 Point Mallard Pkwy SE, Decatur, AL 35601-6531, Latitude: 34.559064, Longitude: -86.97103
6, State: Alabama, City: Birmingham, Store: Target Store - , Address: 1654 Gadsden Hwy, Birmingham, AL 35235-3104, Latitude: 33.60526, Longitude: -86.636743
7, State: Alabama, City: Alabaster, Store: Target Store - , 

In [123]:
store_data = []
for data in raw_store_data.values():
    address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
    latitude = data['geo']['latitude']
    longitude = data['geo']['longitude']
    
    store_name = data.get('name', 'Unknown Store')
    city_name = data['address']['addressLocality']

    store_data.append([state_name, city_name, store_name, address, latitude, longitude])


In [125]:
import pandas as pd

In [128]:
df = pd.DataFrame(store_data,columns=['State','City','Store_type','Address','Latitude','Longitude'])

In [129]:
df.to_csv('target_stores_Aug2024.csv')

In [130]:
# Save the dictionary to a JSON file
with open('target_stores_Aug2024_raw.json', 'w') as json_file:
    json.dump(raw_store_data, json_file, indent=4)