In [13]:
import requests
from bs4 import BeautifulSoup
import csv
import time

# URL of the Target store directory
main_url = "https://www.target.com/store-locator/store-directory"

# Send a GET request to the main store directory page
response = requests.get(main_url)

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all state links (Adjust the selector based on the actual structure)
    state_links = soup.select('a[href^="/store-locator/find-stores"]')

    # List to hold all store addresses
    store_addresses = []

    # Iterate over each state link
    for state_link in state_links:
        state_url = "https://www.target.com" + state_link['href']
        state_name = state_link.text.strip()
        
        # Fetch the page for each state
        state_response = requests.get(state_url)
        if state_response.status_code == 200:
            state_soup = BeautifulSoup(state_response.text, 'html.parser')
            
            # Find all city links in that state
            city_links = state_soup.select('a[href^="/sl/"]')  # Adjust the selector based on actual structure
            
            for city_link in city_links:
                city_url = "https://www.target.com" + city_link['href']
                city_name = city_link.text.strip()
                
                # Fetch the page for each city
                city_response = requests.get(city_url)
                if city_response.status_code == 200:
                    city_soup = BeautifulSoup(city_response.text, 'html.parser')
                    
                    # Find the store address on the city page
                    store_info = city_soup.find('div', class_='StoreInfoStyles__Address-sc-1ylk90e-1 dSyVqU')  # Example class name
                    
                    if store_info:
                        address = store_info.text.strip()
                        store_addresses.append([state_name, city_name, address])
                
                # Sleep for a short time to avoid overwhelming the server
                time.sleep(1)
        
        # Sleep for a short time between states
        time.sleep(2)

    # Write all addresses to a CSV file
    with open('target_store_addresses.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['State', 'City', 'Store Address'])
        writer.writerows(store_addresses)
    
    print(f"Scraped {len(store_addresses)} store addresses.")
else:
    print(f"Failed to retrieve the main store directory. Status code: {response.status_code}")


Scraped 0 store addresses.


In [115]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import re

# Base URL for the Target store directory
base_url = "https://www.target.com"

count = 0

# Dictionary to store raw JSON data for each store
raw_store_data = {}

# Send a GET request to the main store directory page
response = requests.get(f"{base_url}/store-locator/store-directory")

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all state links - use the actual href attribute to get the correct URLs
    state_links = soup.select('a[href^="/store-locator/store-directory/"]')

    # List to hold all store addresses and coordinates
    store_data = []

    # Iterate over each state link
    for state_link in state_links:
        state_url = base_url + state_link['href']
        state_name = state_link.text.strip()
        # print(state_name) 

        # if state_name != 'Arizona': continue

        # Fetch the page for each state
        state_response = requests.get(state_url)
        if state_response.status_code == 200:
            state_soup = BeautifulSoup(state_response.text, 'html.parser')
            
            # Revised regex pattern to match the href values
            pattern = r'\/sl\/[a-z0-9\-]+\/[0-9]+'

            # Using re.findall to match all occurrences of the pattern
            store_urls = re.findall(pattern, state_response.text)

            # Removing duplicates by converting to a set and back to a list
            store_urls = list(set(store_urls))

            for store_url in store_urls:
                count += 1
                full_store_url = base_url + store_url
                
                # Fetch the store details page
                store_response = requests.get(full_store_url)
                if store_response.status_code == 200:
                    store_soup = BeautifulSoup(store_response.text, 'html.parser')
                    
                    # Extract store address and geo-coordinates from JSON-LD script
                    scripts = store_soup.find_all('script', type='application/ld+json')
                    for script in scripts:
                        try:
                            data = json.loads(script.string)
                            if data['@type'] == 'DepartmentStore':
                                address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
                                latitude = data['geo']['latitude']
                                longitude = data['geo']['longitude']
                                
                                store_name = data.get('name', 'Unknown Store')
                                city_name = data['address']['addressLocality']

                                store_data.append([state_name, city_name, store_name, address, latitude, longitude])

                                # Store raw JSON data in the dictionary
                                store_key = f"{state_name}_{city_name}_{store_name}_{data['@id']}"
                                raw_store_data[store_key] = data

                                print(f"{count}, State: {state_name}, City: {city_name}, Store: {store_name}, Address: {address}, Latitude: {latitude}, Longitude: {longitude}")
                                break  # Move to the next store after finding the relevant data
                        except (json.JSONDecodeError, KeyError) as e:
                            print(f"Error processing JSON data: {e}")
            
            # Sleep for a short time between requests to avoid overwhelming the server
            time.sleep(1)
        
        # Sleep for a short time between states
       


Alabama
Alaska
Arizona
State: Arizona, City: Scottsdale, Store: Target Store - , Address: 15444 N Frank Lloyd Wright Blvd, Scottsdale, AZ 85260-2845, Latitude: 33.627292, Longitude: -111.888293
State: Arizona, City: Mesa, Store: Target Store - , Address: 1525 S Power Rd, Mesa, AZ 85206-3707, Latitude: 33.388909, Longitude: -111.683474
State: Arizona, City: Phoenix, Store: Target Store - , Address: 7409 W Virginia Ave, Phoenix, AZ 85035-1336, Latitude: 33.475425, Longitude: -112.218617
State: Arizona, City: Scottsdale, Store: Target Store - , Address: 9000 E Talking Stick Way, Scottsdale, AZ 85250-8502, Latitude: 33.54021, Longitude: -111.887628
State: Arizona, City: Tolleson, Store: Target Store - , Address: 9830 W Lower Buckeye Rd, Tolleson, AZ 85353-9424, Latitude: 33.423059, Longitude: -112.269381
State: Arizona, City: Glendale, Store: Target Store - , Address: 10404 N 43rd Ave, Glendale, AZ 85302-2019, Latitude: 33.580184, Longitude: -112.152971
State: Arizona, City: Tucson, Store:

In [114]:
full_store_url

NameError: name 'full_store_url' is not defined

In [113]:
raw_store_data

{}

In [111]:


# Revised regex pattern to match the href values
pattern = r'\/sl\/[a-z0-9\-]+\/[0-9]+'

# Using re.findall to match all occurrences of the pattern
store_urls = re.findall(pattern, state_response.text)

# Removing duplicates by converting to a set and back to a list
store_urls = list(set(store_urls))

# Print the unique matched URLs
store_urls

['/sl/mesa-east/639',
 '/sl/frank-lloyd-wright-blvd/936',
 '/sl/westridge/851',
 '/sl/scottsdale-talking-stick-way/363',
 '/sl/phoenix-sw/2149',
 '/sl/glendale/2341',
 '/sl/tucson-north/2140',
 '/sl/tucson-se/1863',
 '/sl/paradise-valley/233',
 '/sl/surprise/1335',
 '/sl/gilbert-gateway/1959',
 '/sl/gilbert-val-vista/1209',
 '/sl/gilbert-sw/1960',
 '/sl/bullhead-city/2368',
 '/sl/goodyear/1242',
 '/sl/tempe/319',
 '/sl/arcadia-crossing/950',
 '/sl/peoria-sw/1361',
 '/sl/phoenix-spectrum/2354',
 '/sl/ahwatukee/909',
 '/sl/south-mountain/1905',
 '/sl/peoria-north/825',
 '/sl/chandler-fashion-center/1838',
 '/sl/tucson-el-con-mall/1439',
 '/sl/phoenix-uptown-camelback/3261',
 '/sl/mesa-red-mountain/1386',
 '/sl/fountain-hills/1432',
 '/sl/queen-creek/2365',
 '/sl/flagstaff/935',
 '/sl/oro-valley/700',
 '/sl/goodyear-west/2400',
 '/sl/mesa-west/1429',
 '/sl/tucson-sw/1316',
 '/sl/tempe-rio-salado/2176',
 '/sl/phoenix-i17-and-sr101/1141',
 '/sl/peoria-lake-pleasant-pkwy/2227',
 '/sl/scottsd

In [93]:
state_response.text

'<!DOCTYPE html><html class="RootLayout_layout___9hDS" lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab"/><link rel="stylesheet" href="https://assets.targetimg1.com/webui/st

In [None]:
re.findall()

In [91]:
re.findall(r'\\"href\\":\\"(\\/sl\\/[^\\"]+)', state_response.text)

[]

In [90]:
'\\"href\\":\\"/sl/phoenix-sw/2149\\' in state_response.text

True

In [87]:
state_response.text

'<!DOCTYPE html><html class="RootLayout_layout___9hDS" lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1"/><link rel="preload" as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab"/><link rel="stylesheet" href="https://assets.targetimg1.com/webui/st

In [83]:
state_links

[<a class="view_stateNameLink__qdJ1N" data-lnk="store directory: Alabama" href="/store-locator/store-directory/alabama">Alabama</a>,
 <a class="view_stateNameLink__qdJ1N" data-lnk="store directory: Alaska" href="/store-locator/store-directory/alaska">Alaska</a>,
 <a class="view_stateNameLink__qdJ1N" data-lnk="store directory: Arizona" href="/store-locator/store-directory/arizona">Arizona</a>,
 <a class="view_stateNameLink__qdJ1N" data-lnk="store directory: Arkansas" href="/store-locator/store-directory/arkansas">Arkansas</a>,
 <a class="view_stateNameLink__qdJ1N" data-lnk="store directory: California" href="/store-locator/store-directory/california">California</a>,
 <a class="view_stateNameLink__qdJ1N" data-lnk="store directory: Colorado" href="/store-locator/store-directory/colorado">Colorado</a>,
 <a class="view_stateNameLink__qdJ1N" data-lnk="store directory: Connecticut" href="/store-locator/store-directory/connecticut">Connecticut</a>,
 <a class="view_stateNameLink__qdJ1N" data-ln

In [73]:
store_urls = re.findall(r'href\":\"(\/sl\/[^\"]+)', state_response.text)

In [74]:
store_urls

[]

In [71]:
state_soup.select('a[href^="/sl/"]')

[<a class="view_cityNameLink__O_Xez" data-lnk="store directory: Wyoming: Casper: 164" href="/sl/casper/164">Casper</a>,
 <a class="view_cityNameLink__O_Xez" data-lnk="store directory: Wyoming: Cheyenne: 224" href="/sl/cheyenne/224">Cheyenne</a>,
 <a class="view_cityNameLink__O_Xez" data-lnk="store directory: Wyoming: Jackson: 3409" href="/sl/jackson-hole/3409">Jackson</a>]

In [70]:
state_soup.select('["href\":\"/sl/"]')

SelectorSyntaxError: Malformed attribute selector at position 0
  line 1:
["href":"/sl/"]
^

In [63]:
state_soup

<!DOCTYPE html>
<html class="RootLayout_layout___9hDS" lang="en"><head><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1" name="viewport"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab" rel="preload"/><link data-precedence="next" href="https://assets.targetimg1.com/we

In [49]:
re.findall(r'href\":\"(\/sl\/[^\"]+)', state_soup.text)

TypeError: expected string or bytes-like object

In [46]:
state_soup

<!DOCTYPE html>
<html class="RootLayout_layout___9hDS" lang="en"><head><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1" name="viewport"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab" rel="preload"/><link data-precedence="next" href="https://assets.targetimg1.com/we

In [38]:
state_soup

<!DOCTYPE html>
<html class="RootLayout_layout___9hDS" lang="en"><head><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1" name="viewport"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab" rel="preload"/><link data-precedence="next" href="https://assets.targetimg1.com/we

In [41]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import re

# Base URL for the Target store directory
base_url = "https://www.target.com"

# Dictionary to store raw JSON data for each store
raw_store_data = {}

# Send a GET request to the main store directory page
response = requests.get(f"{base_url}/store-locator/store-directory")

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all state links - use the actual href attribute to get the correct URLs
    state_links = soup.select('a[href^="/store-locator/store-directory/"]')

    # List to hold all store addresses and coordinates
    store_data = []

    # Iterate over each state link
    for state_link in state_links:
        state_url = base_url + state_link['href']
        state_name = state_link.text.strip()

        # Fetch the page for each state
        state_response = requests.get(state_url)
        if state_response.status_code == 200:
            state_soup = BeautifulSoup(state_response.text, 'html.parser')
            
            # Find all city links in that state
            city_links = state_soup.select('a[href^="/sl/"]')
            
            for city_link in city_links:
                city_url = base_url + city_link['href']
                city_name = city_link.text.strip()
                
                # Fetch the page for each city
                city_response = requests.get(city_url)
                if city_response.status_code == 200:
                    city_soup = BeautifulSoup(city_response.text, 'html.parser')
                    
                    # Check for multiple store data within the city
                    multi_store_data = re.findall(r'className\":\"styles_storeTitleLink__nnw_5\",\"href\":\"(.*?)\".*?\"children\":\[\$,\"h3\",null,\{\"className\":\"styles_storeCardTitle__4tfVK\",\"data-test\":\"@store-locator/StoreCard/StoreCardTitle\",\"children\":\[(.*?)\]\]\]\]\]\]\)\</script>', city_response.text)
                    
                    if multi_store_data:
                        # Iterate through each store in the city
                        for store_href, store_name in multi_store_data:
                            store_url = base_url + store_href
                            store_name = store_name.strip('\"')
                            
                            # Fetch the store details page
                            store_response = requests.get(store_url)
                            if store_response.status_code == 200:
                                store_soup = BeautifulSoup(store_response.text, 'html.parser')
                                
                                # Extract store address and geo-coordinates from JSON-LD script
                                scripts = store_soup.find_all('script', type='application/ld+json')
                                for script in scripts:
                                    try:
                                        data = json.loads(script.string)
                                        if data['@type'] == 'DepartmentStore':
                                            address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
                                            latitude = data['geo']['latitude']
                                            longitude = data['geo']['longitude']
                                            
                                            store_data.append([state_name, city_name, store_name, address, latitude, longitude])

                                            # Store raw JSON data in the dictionary
                                            store_key = f"{state_name}_{city_name}_{store_name}_{data['@id']}"
                                            raw_store_data[store_key] = data

                                            print(f"State: {state_name}, City: {city_name}, Store: {store_name}, Address: {address}, Latitude: {latitude}, Longitude: {longitude}")
                                            break  # Move to the next store after finding the relevant data
                                    except (json.JSONDecodeError, KeyError) as e:
                                        print(f"Error processing JSON data: {e}")
                    else:
                        # If only one store, process as before
                        scripts = city_soup.find_all('script', type='application/ld+json')
                        for script in scripts:
                            try:
                                data = json.loads(script.string)
                                if data['@type'] == 'DepartmentStore':
                                    address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
                                    latitude = data['geo']['latitude']
                                    longitude = data['geo']['longitude']
                                    
                                    store_data.append([state_name, city_name, address, latitude, longitude])

                                    # Store raw JSON data in the dictionary
                                    store_key = f"{state_name}_{city_name}_{data['@id']}"
                                    raw_store_data[store_key] = data

                                    print(f"State: {state_name}, City: {city_name}, Address: {address}, Latitude: {latitude}, Longitude: {longitude}")
                                    break  # Move to the next city after finding the relevant data
                            except (json.JSONDecodeError, KeyError) as e:
                                print(f"Error processing JSON data: {e}")
                
                # Sleep for a short time to avoid overwhelming the server
                time.sleep(1)
        
        # Sleep for a short time between states
        time.sleep(2)

    # Write all data to a CSV file
    with open('target_store_addresses.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['State', 'City', 'Store Name', 'Store Address', 'Latitude', 'Longitude'])
        writer.writerows(store_data)
    
    print(f"Scraped {len(store_data)} store addresses and coordinates.")
else:
    print(f"Failed to retrieve the main store directory. Status code: {response.status_code}")

# The raw_store_data dictionary now contains raw JSON data for each store


State: Alabama, City: Alabaster, Address: 250 S Colonial Dr, Alabaster, AL 35007-4657, Latitude: 33.224895, Longitude: -86.803977
State: Alabama, City: Auburn, Address: 129 N College St, Auburn, AL 36830-4705, Latitude: 32.607518, Longitude: -85.482037
State: Alabama, City: Bessemer, Address: 4889 Promenade Pkwy, Bessemer, AL 35022-7305, Latitude: 33.334428, Longitude: -86.990326
State: Alabama, City: Daphne, Address: 1698 US Highway 98, Daphne, AL 36526-4252, Latitude: 30.603217, Longitude: -87.896507
State: Alabama, City: Decatur, Address: 1235 Point Mallard Pkwy SE, Decatur, AL 35601-6531, Latitude: 34.559064, Longitude: -86.97103
State: Alabama, City: Dothan, Address: 4601 Montgomery Hwy, Dothan, AL 36303-1522, Latitude: 31.266492, Longitude: -85.446808
State: Alabama, City: Florence, Address: 372 Cox Creek Pkwy, Florence, AL 35630-1540, Latitude: 34.84213, Longitude: -87.634764
State: Alabama, City: Fultondale, Address: 3489 Lowery Pkwy, Fultondale, AL 35068-1677, Latitude: 33.605

KeyboardInterrupt: 

In [32]:
state_soup

<!DOCTYPE html>
<html class="RootLayout_layout___9hDS" lang="en"><head><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1" name="viewport"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab" rel="preload"/><link data-precedence="next" href="https://assets.targetimg1.com/we

In [33]:
import requests
from bs4 import BeautifulSoup
import csv
import json
import re

# Base URL for the Target store directory
base_url = "https://www.target.com"

# Dictionary to store raw JSON data for each store
raw_store_data = {}

# URL for California state directory
state_url = f"{base_url}/store-locator/store-directory/california"

# List to hold all store addresses and coordinates
store_data = []

# Fetch the page for California
state_response = requests.get(state_url)
if state_response.status_code == 200:
    state_soup = BeautifulSoup(state_response.text, 'html.parser')
    
    # Find the relevant data in the script tags or embedded in the page
    scripts = state_soup.find_all('script')
    
    for script in scripts:
        script_text = script.string
        if script_text and 'San Jose' in script_text:
            # Extract the portion that contains San Jose data
            san_jose_data_match = re.search(r'\"San Jose\",(.+?)\]\]\]', script_text)
            if san_jose_data_match:
                san_jose_data = san_jose_data_match.group(1)
                
                # Extract store IDs and names
                ids_match = re.search(r'data-ids\":\[(.*?)\]', san_jose_data)
                names_match = re.search(r'storeNames\":\[(.*?)\]', san_jose_data)
                
                if ids_match and names_match:
                    store_ids = json.loads(f'[{ids_match.group(1)}]')
                    store_names = json.loads(f'[{names_match.group(1)}]')
                    
                    for store_id, store_name in zip(store_ids, store_names):
                        store_url = f"{base_url}/sl/san-jose/{store_id}"
                        store_response = requests.get(store_url)
                        if store_response.status_code == 200:
                            store_soup = BeautifulSoup(store_response.text, 'html.parser')
                            
                            # Extract store address and geo-coordinates from JSON-LD script
                            scripts = store_soup.find_all('script', type='application/ld+json')
                            for script in scripts:
                                try:
                                    data = json.loads(script.string)
                                    if data['@type'] == 'DepartmentStore':
                                        address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
                                        latitude = data['geo']['latitude']
                                        longitude = data['geo']['longitude']
                                        
                                        store_data.append(['California', 'San Jose', store_name, address, latitude, longitude])
                                        print(f"Store: {store_name}, Address: {address}, Latitude: {latitude}, Longitude: {longitude}")
                                        break
                                except (json.JSONDecodeError, KeyError) as e:
                                    print(f"Error processing JSON data: {e}")
    else:
        print("San Jose data not found in the state script.")
else:
    print(f"Failed to retrieve the California state directory. Status code: {state_response.status_code}")

# Write all data to a CSV file
with open('target_store_addresses_san_jose.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['State', 'City', 'Store Name', 'Store Address', 'Latitude', 'Longitude'])
    writer.writerows(store_data)

print(f"Scraped {len(store_data)} store addresses and coordinates for San Jose.")


San Jose data not found in the state script.
Scraped 0 store addresses and coordinates for San Jose.


In [35]:
script_text

'self.__next_f.push([1,"149:[\\"$\\",\\"div\\",null,{\\"className\\":\\"styles_card__2hscu\\",\\"data-test\\":\\"@store-locator/StoreCard\\",\\"children\\":[[\\"$\\",\\"div\\",null,{\\"className\\":\\"styles_storeTitleContainer__iCNUq\\",\\"children\\":[[\\"$\\",\\"$L1a\\",null,{\\"className\\":\\"styles_storeTitleLink__nnw_5\\",\\"href\\":\\"/sl/vista-south-business-park-drive/2165\\",\\"prefetch\\":false,\\"children\\":[\\"$\\",\\"h3\\",null,{\\"className\\":\\"styles_storeCardTitle__4tfVK\\",\\"data-test\\":\\"@store-locator/StoreCard/StoreCardTitle\\",\\"children\\":[\\"Vista South Business Park Drive\\",[\\"$\\",\\"span\\",null,{\\"className\\":\\"h-sr-only\\",\\"children\\":\\"store details\\"}]]}]}],[\\"$\\",\\"$L14e\\",null,{\\"storeId\\":\\"$undefined\\"}]]}],[\\"$\\",\\"hr\\",null,{\\"className\\":\\"styles_row__g0kVX\\"}],[\\"$\\",\\"div\\",null,{\\"className\\":\\"styles_storeInfoContainer__S4ERb\\",\\"children\\":[[\\"$\\",\\"span\\",null,{\\"className\\":\\"h-display-inli

In [26]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import json

# Base URL for the Target store directory
base_url = "https://www.target.com"

# Dictionary to store raw JSON data for each store
raw_store_data = {}

# URL for Minnesota state directory
state_url = f"{base_url}/store-locator/store-directory/minnesota"

# List to hold all store addresses and coordinates
store_data = []

# Fetch the page for Minnesota
state_response = requests.get(state_url)
if state_response.status_code == 200:
    state_soup = BeautifulSoup(state_response.text, 'html.parser')
    
    # Find the link for Minneapolis
    city_link = state_soup.find('a', href=True, text='Minneapolis')
    
    if city_link:
        city_url = base_url + city_link['href']
        city_name = city_link.text.strip()
        
        # Fetch the page for Minneapolis
        city_response = requests.get(city_url)
        if city_response.status_code == 200:
            city_soup = BeautifulSoup(city_response.text, 'html.parser')
            
            # Check if the city page has multiple stores
            store_items = city_soup.find_all('div', class_='styles__StoreCardContent-sc-1g9lia6-0')  # Example class name, adjust as needed
            
            if store_items:
                # Iterate through each store listed in the sidebar
                for store_item in store_items:
                    store_address = store_item.find('address').text.strip() if store_item.find('address') else ''
                    store_lat_lng = store_item.find('div', class_='geo-location-class')  # Adjust based on actual HTML
                    latitude = store_lat_lng['data-latitude'] if store_lat_lng else None
                    longitude = store_lat_lng['data-longitude'] if store_lat_lng else None
                    
                    if store_address and latitude and longitude:
                        store_data.append(['Minnesota', city_name, store_address, latitude, longitude])
                        print(f"City: {city_name}, Address: {store_address}, Latitude: {latitude}, Longitude: {longitude}")
            else:
                # If only one store, process as before
                scripts = city_soup.find_all('script', type='application/ld+json')
                for script in scripts:
                    try:
                        data = json.loads(script.string)
                        if data['@type'] == 'DepartmentStore':
                            address = data['address']['streetAddress'] + ', ' + data['address']['addressLocality'] + ', ' + data['address']['addressRegion'] + ' ' + data['address']['postalCode']
                            latitude = data['geo']['latitude']
                            longitude = data['geo']['longitude']
                            
                            store_data.append(['Minnesota', city_name, address, latitude, longitude])
                            print(f"City: {city_name}, Address: {address}, Latitude: {latitude}, Longitude: {longitude}")
                            break
                    except (json.JSONDecodeError, KeyError) as e:
                        print(f"Error processing JSON data: {e}")
    
    else:
        print("Minneapolis link not found.")
else:
    print(f"Failed to retrieve the Minnesota state directory. Status code: {state_response.status_code}")

# Write all data to a CSV file
with open('target_store_addresses_minneapolis.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['State', 'City', 'Store Address', 'Latitude', 'Longitude'])
    writer.writerows(store_data)

print(f"Scraped {len(store_data)} store addresses and coordinates for Minneapolis.")


Minneapolis link not found.
Scraped 0 store addresses and coordinates for Minneapolis.


  city_link = state_soup.find('a', href=True, text='Minneapolis')


In [29]:
city_link

In [21]:
state_url

'https://www.target.com/store-locator/find-stores'

In [22]:
city_url

'https://www.target.com/sl/scotts-valley/3410'

In [14]:
city_soup

<!DOCTYPE html>
<html class="RootLayout_layout___9hDS" lang="en"><head><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=2, interactive-widget=overlays-content" name="viewport"/><meta content="width=device-width, initial-scale=1" name="viewport"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_b3cea7ef-22bf-4cdf-92bb-e100a10ec623" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_adf88fa5-9437-443e-8ddd-674dde6c3a82" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_1e7d1a3c-3927-49e3-8ef7-bac2397fb36c" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_77400a3e-7081-4c52-8feb-7126a78353e1" rel="preload"/><link as="image" href="https://target.scene7.com/is/content/Target/GUEST_ca8f1d56-9000-4407-be78-2c33be983dab" rel="preload"/><link data-precedence="next" href="https://assets.targetimg1.com/we

In [2]:
!pip install bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Collecting beautifulsoup4
  Downloading beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.9/147.9 KB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting soupsieve>1.2
  Downloading soupsieve-2.6-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, bs4
Successfully installed beautifulsoup4-4.12.3 bs4-0.0.2 soupsieve-2.6
You should consider upgrading via the '/Users/lingxunkong/.pyenv/versions/3.9.13/envs/aienv/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0m