# US Lesbian Bars Research and Interactive Visualization Project

In [56]:
# Import necessary libraries
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError

import time
from tqdm import tqdm
import os
from dotenv import load_dotenv

load_dotenv()  # Load environment variables from .env
api_key = os.getenv("API_KEY")

In [52]:
# Load the Excel file
xls = 'LesbianBars_US.xlsx'
df = pd.read_excel(xls, "Bars")

In [58]:
# Check the DataFrame
# df.head(3)

In [None]:
# Initialize the geocoder
geolocator = Nominatim(user_agent="lesbianbars")

In [59]:
# Define functions

def get_location_data(search_string, max_retries=3):
    """
    Retrieves location data for a given search string using Nominatim (OpenStreetMap data).
    Includes retry logic for handling timeouts and extracts additional location details.
    """
    for attempt in range(max_retries):
        try:
            # Add a delay to respect Nominatim's usage policy (1 request per second)
            if attempt > 0:
                time.sleep(1)
            
            location = geolocator.geocode(search_string, timeout=10)
            
            if location:
                # Basic location data
                result = {
                    'address': location.address,
                    'latitude': location.latitude,
                    'longitude': location.longitude
                }
                
                # Add neighborhood and postcode if available in the address components
                if 'address' in location.raw:
                    addr = location.raw['address']
                    
                    # Extract neighbourhood (might be spelled differently in different regions)
                    if 'neighbourhood' in addr:
                        result['neighbourhood'] = addr['neighbourhood']
                    elif 'neighborhood' in addr:
                        result['neighbourhood'] = addr['neighborhood']
                    else:
                        result['neighbourhood'] = None
                    
                    # Extract postcode
                    if 'postcode' in addr:
                        result['postcode'] = addr['postcode']
                    else:
                        result['postcode'] = None
                    
                return result
            else:
                return {
                    'address': None,
                    'latitude': None,
                    'longitude': None,
                    'neighbourhood': None,
                    'postcode': None,
                    'error': "No results found"
                }
                
        except (GeocoderTimedOut, GeocoderServiceError) as e:
            if attempt == max_retries - 1:
                return {
                    'address': None,
                    'latitude': None,
                    'longitude': None,
                    'neighbourhood': None,
                    'postcode': None,
                    'error': f"Failed after {max_retries} attempts: {str(e)}"
                }
            time.sleep(2)  # Wait a bit longer before retrying

Geocoding bars:   0%|                                                                            | 0/57 [00:02<?, ?it/s]

Error for A League of Her Own: No results found


Geocoding bars:   2%|█▏                                                                  | 1/57 [00:03<02:58,  3.19s/it]

Error for Arcana Bar and Lounge: No results found


Geocoding bars:   5%|███▌                                                                | 3/57 [00:06<01:34,  1.74s/it]

Error for Babes of Carytown: No results found


Geocoding bars:   7%|████▊                                                               | 4/57 [00:07<01:20,  1.52s/it]

Error for The Backdoor: No results found


Geocoding bars:   9%|█████▉                                                              | 5/57 [00:08<01:12,  1.40s/it]

Error for Boycott Bar: No results found


Geocoding bars:  11%|███████▏                                                            | 6/57 [00:09<01:08,  1.34s/it]

Error for The Bush: No results found


Geocoding bars:  28%|██████████████████▊                                                | 16/57 [00:21<00:49,  1.21s/it]

Error for Jolene: No results found


Geocoding bars:  30%|███████████████████▉                                               | 17/57 [00:23<00:47,  1.19s/it]

Error for The Lady's Room: No results found


Geocoding bars:  32%|█████████████████████▏                                             | 18/57 [00:24<00:54,  1.40s/it]

Error for The Lipstick Lounge: No results found


Geocoding bars:  35%|███████████████████████▌                                           | 20/57 [00:27<00:49,  1.33s/it]

Error for My Sister's Room: No results found


Geocoding bars:  40%|███████████████████████████                                        | 23/57 [00:31<00:43,  1.28s/it]

Error for The Ruby Fruit: No results found


Geocoding bars:  42%|████████████████████████████▏                                      | 24/57 [00:32<00:41,  1.27s/it]

Error for Scarlet Fox Wine Bar: No results found


Geocoding bars:  49%|████████████████████████████████▉                                  | 28/57 [00:37<00:35,  1.22s/it]

Error for Unicorn Bar: No results found


Geocoding bars:  54%|████████████████████████████████████▍                              | 31/57 [00:40<00:31,  1.19s/it]

Error for Wildside West: No results found


Geocoding bars:  58%|██████████████████████████████████████▊                            | 33/57 [00:43<00:30,  1.26s/it]

Error for Herz: No results found


Geocoding bars:  60%|███████████████████████████████████████▉                           | 34/57 [00:44<00:28,  1.25s/it]

Error for Blush & Blue: No results found


Geocoding bars:  61%|█████████████████████████████████████████▏                         | 35/57 [00:45<00:26,  1.23s/it]

Error for Alibi's: No results found


Geocoding bars:  63%|██████████████████████████████████████████▎                        | 36/57 [00:47<00:25,  1.22s/it]

Error for Whisky Girl Tavern: No results found


Geocoding bars:  68%|█████████████████████████████████████████████▊                     | 39/57 [00:50<00:22,  1.23s/it]

Error for The Stop: No results found


Geocoding bars:  70%|███████████████████████████████████████████████                    | 40/57 [00:51<00:20,  1.22s/it]

Error for Lipstick 24: No results found


Geocoding bars:  74%|█████████████████████████████████████████████████▎                 | 42/57 [00:54<00:18,  1.22s/it]

Error for Toasted Walnut: No results found


Geocoding bars:  77%|███████████████████████████████████████████████████▋               | 44/57 [00:56<00:15,  1.21s/it]

Error for Broad's Lounge: No results found


Geocoding bars:  79%|████████████████████████████████████████████████████▉              | 45/57 [00:58<00:15,  1.28s/it]

Error for Sweetwater Saloon: No results found


Geocoding bars:  81%|██████████████████████████████████████████████████████             | 46/57 [00:59<00:13,  1.26s/it]

Error for Good Luck Charm: No results found


Geocoding bars:  82%|███████████████████████████████████████████████████████▏           | 47/57 [01:00<00:12,  1.26s/it]

Error for Fannie's: No results found


Geocoding bars:  84%|████████████████████████████████████████████████████████▍          | 48/57 [01:01<00:11,  1.25s/it]

Error for Wall Street: No results found


Geocoding bars:  86%|█████████████████████████████████████████████████████████▌         | 49/57 [01:03<00:10,  1.25s/it]

Error for GirlBar: No results found


Geocoding bars:  88%|██████████████████████████████████████████████████████████▊        | 50/57 [01:04<00:08,  1.20s/it]

Error for Coconuts: No results found


Geocoding bars:  89%|███████████████████████████████████████████████████████████▉       | 51/57 [01:05<00:07,  1.18s/it]

Error for The Cartwheel: No results found


Geocoding bars:  91%|█████████████████████████████████████████████████████████████      | 52/57 [01:06<00:05,  1.18s/it]

Error for Meow Mix: No results found


Geocoding bars:  93%|██████████████████████████████████████████████████████████████▎    | 53/57 [01:07<00:04,  1.17s/it]

Error for Clit Club: No results found


Geocoding bars:  95%|███████████████████████████████████████████████████████████████▍   | 54/57 [01:09<00:03,  1.17s/it]

Error for Cash Inn: No results found


Geocoding bars:  96%|████████████████████████████████████████████████████████████████▋  | 55/57 [01:10<00:02,  1.24s/it]

Error for The Lexington Club: No results found


Geocoding bars: 100%|███████████████████████████████████████████████████████████████████| 57/57 [01:12<00:00,  1.28s/it]


First few results:
                bar_name                                      search_string  \
0    A League of Her Own           A League of Her Own, Washington, DC, USA   
1  Arcana Bar and Lounge  Arcana Bar and Lounge, Durham, North Carolina,...   
2             As You Are                    As You Are, Washington, DC, USA   
3      Babes of Carytown         Babes of Carytown, Richmond, Virginia, USA   
4           The Backdoor            The Backdoor, Bloomington, Indiana, USA   

                                             address   latitude  longitude  \
0                                               None        NaN        NaN   
1                                               None        NaN        NaN   
2  As You Are, 500, 8th Street Southeast, Barrack...  38.882582 -76.994779   
3                                               None        NaN        NaN   
4                                               None        NaN        NaN   

   neighbourhood  postcode  
0      




### Note: The following cell contributes towards API limits!

In [None]:
# Create search strings combining name, city and state
results = []

# Use tqdm to create a progress bar
for _, row in tqdm(df.iterrows(), total=len(df), desc="Geocoding bars"):
    name = row['Name']
    city = row['City'] if 'City' in row and pd.notna(row['City']) else ""
    state = row['State'] if 'State' in row and pd.notna(row['State']) else ""
    
    search_string = f"{name}, {city}, {state}, USA"
    
    # Get location data
    location_data = get_location_data(search_string)
    
    # Add the bar name and search string to the location data
    location_data['bar_name'] = name
    location_data['search_string'] = search_string
    
    # Print error if there was one (optional)
    if 'error' in location_data:
        tqdm.write(f"Error for {name}: {location_data['error']}")
        location_data.pop('error', None)  # Remove error key from dictionary
    
    results.append(location_data)
    
    # Add a delay to respect Nominatim's usage policy (1 request per second)
    time.sleep(1)

In [None]:
# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Reorder the columns for better readability
results_df = results_df[['bar_name', 'search_string', 'address', 'latitude', 'longitude', 'neighbourhood', 'postcode']]

# Display the first few results
print("\nFirst few results:")
print(results_df.head())

# Save the results to a new Excel file
results_df.to_excel('LesbianBars_Geocoded.xlsx', index=False)
print(f"Saved geocoded data for {len(results_df)} bars to LesbianBars_Geocoded.xlsx")

### Now manually check the results for accuracy. Fix any misspelled names, etc. in the Excel file so that the search will return the correct result. You can use Google Maps for this.

In [60]:
results

[{'address': None,
  'latitude': None,
  'longitude': None,
  'neighbourhood': None,
  'postcode': None,
  'bar_name': 'A League of Her Own',
  'search_string': 'A League of Her Own, Washington, DC, USA'},
 {'address': None,
  'latitude': None,
  'longitude': None,
  'neighbourhood': None,
  'postcode': None,
  'bar_name': 'Arcana Bar and Lounge',
  'search_string': 'Arcana Bar and Lounge, Durham, North Carolina, USA'},
 {'address': 'As You Are, 500, 8th Street Southeast, Barracks Row, Capitol Hill, Ward 6, Washington, District of Columbia, 20003, United States',
  'latitude': 38.8825825,
  'longitude': -76.9947786,
  'bar_name': 'As You Are',
  'search_string': 'As You Are, Washington, DC, USA'},
 {'address': None,
  'latitude': None,
  'longitude': None,
  'neighbourhood': None,
  'postcode': None,
  'bar_name': 'Babes of Carytown',
  'search_string': 'Babes of Carytown, Richmond, Virginia, USA'},
 {'address': None,
  'latitude': None,
  'longitude': None,
  'neighbourhood': None,
  

In [None]:
# Import necessary libraries
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from tqdm import tqdm

# Load the Excel file
xls = 'LesbianBars_US.xlsx'
df = pd.read_excel(xls, "Bars")

# Initialize the geocoder
geolocator = Nominatim(user_agent="lesbianbars")

# Define functions

def get_location_data(search_string, max_retries=3):
    """
    Retrieves location data for a given search string using Nominatim (OpenStreetMap data).
    Includes retry logic for handling timeouts and extracts additional location details.
    """
    for attempt in range(max_retries):
        try:
            # Add a delay to respect Nominatim's usage policy (1 request per second)
            if attempt > 0:
                time.sleep(1)
            
            location = geolocator.geocode(search_string, timeout=10)
            
            if location:
                # Basic location data
                result = {
                    'address': location.address,
                    'latitude': location.latitude,
                    'longitude': location.longitude
                }
                
                # Add neighborhood and postcode if available in the address components
                if 'address' in location.raw:
                    addr = location.raw['address']
                    
                    # Extract neighbourhood (might be spelled differently in different regions)
                    if 'neighbourhood' in addr:
                        result['neighbourhood'] = addr['neighbourhood']
                    elif 'neighborhood' in addr:
                        result['neighbourhood'] = addr['neighborhood']
                    else:
                        result['neighbourhood'] = None
                    
                    # Extract postcode
                    if 'postcode' in addr:
                        result['postcode'] = addr['postcode']
                    else:
                        result['postcode'] = None
                    
                return result
            else:
                return {
                    'address': None,
                    'latitude': None,
                    'longitude': None,
                    'neighbourhood': None,
                    'postcode': None,
                    'error': "No results found"
                }
                
        except (GeocoderTimedOut, GeocoderServiceError) as e:
            if attempt == max_retries - 1:
                return {
                    'address': None,
                    'latitude': None,
                    'longitude': None,
                    'neighbourhood': None,
                    'postcode': None,
                    'error': f"Failed after {max_retries} attempts: {str(e)}"
                }
            time.sleep(2)  # Wait a bit longer before retrying

# Create search strings combining name, city and state
results = []

# Use tqdm to create a progress bar
for _, row in tqdm(df.iterrows(), total=len(df), desc="Geocoding bars"):
    name = row['Name']
    city = row['City'] if 'City' in row and pd.notna(row['City']) else ""
    state = row['State'] if 'State' in row and pd.notna(row['State']) else ""
    
    search_string = f"{name}, {city}, {state}, USA"
    
    # Get location data
    location_data = get_location_data(search_string)
    
    # Add the bar name and search string to the location data
    location_data['bar_name'] = name
    location_data['search_string'] = search_string
    
    # Print error if there was one (optional)
    if 'error' in location_data:
        tqdm.write(f"Error for {name}: {location_data['error']}")
        location_data.pop('error', None)  # Remove error key from dictionary
    
    results.append(location_data)
    
    # Add a delay to respect Nominatim's usage policy (1 request per second)
    time.sleep(1)

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Reorder the columns for better readability
results_df = results_df[['bar_name', 'search_string', 'address', 'latitude', 'longitude', 'neighbourhood', 'postcode']]

# Display the first few results
print("\nFirst few results:")
print(results_df.head())

# Save the results to a new Excel file
results_df.to_excel('LesbianBars_Geocoded.xlsx', index=False)
print(f"Saved geocoded data for {len(results_df)} bars to LesbianBars_Geocoded.xlsx")