In [None]:
!pip install googlemaps

Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25l[?25hdone
  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40712 sha256=1ad258e62f0e3e95de9489ca459359b148c0d4488aa95e332403b7b65435ba40
  Stored in directory: /root/.cache/pip/wheels/17/f8/79/999d5d37118fd35d7219ef57933eb9d09886c4c4503a800f84
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0


# Testing the API with one Address

In [None]:
import pandas as pd
import googlemaps

# Initialize the Google Maps client with your API key
gmaps = googlemaps.Client(key='AIzaSyBYrrIZUzs0oGh8o1aZb7AsEbd3cLEz5os')

# Function to geocode an address and check if it's in Lubbock County, Texas
def geocode_address(address, year):
    try:
        result = gmaps.geocode(address)
        print(result)
        print('----------------------------\n')
        if result:
            location = result[0]['geometry']['location']
            lat, lng = location['lat'], location['lng']

            # Check if the address is in Lubbock County, Texas
            for component in result[0]['address_components']:
                if 'administrative_area_level_2' in component['types'] and component['long_name'] == 'Lubbock County':
                    print(f"Address found in Lubbock County: {address}")
                    return pd.Series([lat, lng], index=[f'latitude_{year}', f'longitude_{year}'])

            print(f"Address not in Lubbock County: {address}")

    except Exception as e:
        print(f"Error geocoding address: {address}. Error: {str(e)}")

    return pd.Series([None, None], index=[f'latitude_{year}', f'longitude_{year}'])

# Create a sample dataframe with one example address
data = {'Standardized_Address_1945': ['1513 26th St, Lubbock, TX 79411']}
df1945 = pd.DataFrame(data)

# Geocode the address and add latitude and longitude columns
df1945[['latitude_1945', 'longitude_1945']] = df1945['Standardized_Address_1945'].apply(lambda x: geocode_address(x, 1945))

# Print the updated dataframe
print(df1945)

[{'address_components': [{'long_name': '1513', 'short_name': '1513', 'types': ['street_number']}, {'long_name': '26th Street', 'short_name': '26th St', 'types': ['route']}, {'long_name': 'Slaton-Bean', 'short_name': 'Slaton-Bean', 'types': ['neighborhood', 'political']}, {'long_name': 'Lubbock', 'short_name': 'Lubbock', 'types': ['locality', 'political']}, {'long_name': 'Lubbock County', 'short_name': 'Lubbock County', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Texas', 'short_name': 'TX', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}, {'long_name': '79411', 'short_name': '79411', 'types': ['postal_code']}, {'long_name': '2433', 'short_name': '2433', 'types': ['postal_code_suffix']}], 'formatted_address': '1513 26th St, Lubbock, TX 79411, USA', 'geometry': {'bounds': {'northeast': {'lat': 33.5704626, 'lng': -101.8520815}, 'southwest': {'lat': 33.5703747, 'lng': -10

# Standardizing 1945 Addresses as per Google's official address format

In [None]:
import pandas as pd
import googlemaps
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

# Initialize the Google Maps client with your API key
gmaps = googlemaps.Client(key='AIzaSyBYrrIZUzs0oGh8o1aZb7AsEbd3cLEz5os')

# Function to geocode an address and check if it's in Lubbock County, Texas
def geocode_address(address, year):
    if pd.isna(address) or address.strip() == '':
        return pd.Series([None])

    try:
        result = gmaps.geocode(address)
        if result:
            # Extract the relevant information from the API response
            formatted_address = result[0]['formatted_address']
            geometry = result[0]['geometry']
            address_components = result[0]['address_components']

            # Create a dictionary to store the extracted information
            data = {
                f'Google_Standard_address_{year}': formatted_address,
                f'latitude_{year}': geometry['location']['lat'],
                f'longitude_{year}': geometry['location']['lng']
            }

            # Iterate over the address components and add them as columns
            for component in address_components:
                for type_ in component['types']:
                    column_name = f"{type_}_{year}"
                    data[column_name] = component['long_name']

            # Check if the address is in Lubbock County, Texas
            if any(component['long_name'] == 'Lubbock County' and 'administrative_area_level_2' in component['types'] for component in address_components):
                print(f"Address found in Lubbock County: {address}")
            else:
                print(f"Address not in Lubbock County: {address}")

            return pd.Series(data)

    except Exception as e:
        print(f"Error geocoding address: {address}. Error: {str(e)}")

    # Return None for all columns if geocoding fails
    return pd.Series([None])

# Create a sample dataframe with example addresses
data = pd.read_excel('/content/1945_Standardized_Final_.xlsx')
df1945 = pd.DataFrame(data)

# Initialize API call counter
api_call_count = 0

# Function to process a batch of addresses
def process_batch(batch_df):
    global api_call_count
    geocoded_data = []
    for _, row in batch_df.iterrows():
        address = row['Standardized_Address_1945']
        if not pd.isna(address) and address.strip() != '':
            result = geocode_address(address, 1945)
            geocoded_data.append(result)
            api_call_count += 1
        else:
            geocoded_data.append(pd.Series([None]))

        # Add a delay of 5 seconds after every 50 API calls
        if api_call_count % 50 == 0:
            time.sleep(5)
    return pd.DataFrame(geocoded_data, index=batch_df.index)

# Create a ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor() as executor:
    # Create batches of addresses
    batch_size = 50
    batches = [df1945[i:i+batch_size] for i in range(0, len(df1945), batch_size)]

    # Submit the batches for processing
    futures = [executor.submit(process_batch, batch) for batch in batches]

    # Collect the results from the futures
    results = []
    for future in as_completed(futures):
        results.append(future.result())

    # Concatenate the results into a single DataFrame
    geocoded_df = pd.concat(results)

# Combine the ID_1945, StandardizedAddress1945 columns with the geocoded data
result_df = pd.concat([df1945[['ID_1945', 'Standardized_Address_1945']], geocoded_df], axis=1)

# Save the updated dataframe to a new Excel sheet
output_file = 'geocoded_data_1945.xlsx'
result_df.to_excel(output_file, index=False, engine='openpyxl')
print(f"Data saved to {output_file}")

# Print the count of API calls
print(f"Total API calls: {api_call_count}")

Address found in Lubbock County: 2803, 27th Street, Tech Terrace U.N.I.T., Lubbock, Lubbock County, Texas, 79410, United States
Address found in Lubbock County: 2211, 14th Street, South Overton, Lubbock, Lubbock County, Texas, 79401, United States
Address found in Lubbock County: 1312, 26th Street, Slaton-Bean, Lubbock, Lubbock County, Texas, 79411, United States
Address found in Lubbock County: 2701, 29th Street, Tech Terrace U.N.I.T., Lubbock, Lubbock County, Texas, 79410, United States
Address found in Lubbock County: 1308, 26th Street, Slaton-Bean, Lubbock, Lubbock County, Texas, 79411, United States
Address found in Lubbock County: 2203, 29th Street, Heart of Lubbock, Lubbock, Lubbock County, Texas, 79411, United States
Address found in Lubbock County: 2613, 22nd Street, Tech Terrace U.N.I.T., Lubbock, Lubbock County, Texas, 79410, United States
Address found in Lubbock County: 2407, 22nd Street, Heart of Lubbock, Lubbock, Lubbock County, Texas, 79411, United States
Address found 

# Standardizing 1975 Addresses as per Google's official address format

In [None]:
import pandas as pd
import googlemaps
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

# Initialize the Google Maps client with your API key
gmaps = googlemaps.Client(key='AIzaSyBYrrIZUzs0oGh8o1aZb7AsEbd3cLEz5os')

# Function to geocode an address and check if it's in Lubbock County, Texas
def geocode_address(address, year):
    if pd.isna(address) or address.strip() == '':
        return pd.Series([None])

    try:
        result = gmaps.geocode(address)
        if result:
            # Extract the relevant information from the API response
            formatted_address = result[0]['formatted_address']
            geometry = result[0]['geometry']
            address_components = result[0]['address_components']

            # Create a dictionary to store the extracted information
            data = {
                f'Google_Standard_address_{year}': formatted_address,
                f'latitude_{year}': geometry['location']['lat'],
                f'longitude_{year}': geometry['location']['lng']
            }

            # Iterate over the address components and add them as columns
            for component in address_components:
                for type_ in component['types']:
                    column_name = f"{type_}_{year}"
                    data[column_name] = component['long_name']

            # Check if the address is in Lubbock County, Texas
            if any(component['long_name'] == 'Lubbock County' and 'administrative_area_level_2' in component['types'] for component in address_components):
                print(f"Address found in Lubbock County: {address}")
            else:
                print(f"Address not in Lubbock County: {address}")

            return pd.Series(data)

    except Exception as e:
        print(f"Error geocoding address: {address}. Error: {str(e)}")

    # Return None for all columns if geocoding fails
    return pd.Series([None])

# Create a sample dataframe with example addresses
data = pd.read_excel('/content/1975_Standardized_Final - 1 to 25000.xlsx')
df1975 = pd.DataFrame(data)

# Initialize API call counter
api_call_count = 0

# Function to process a batch of addresses
def process_batch(batch_df):
    global api_call_count
    geocoded_data = []
    for _, row in batch_df.iterrows():
        address = row['Standardized_Address_1975']
        if not pd.isna(address) and address.strip() != '':
            result = geocode_address(address, 1975)
            geocoded_data.append(result)
            api_call_count += 1
        else:
            geocoded_data.append(pd.Series([None]))

        # Add a delay of 5 seconds after every 50 API calls
        if api_call_count % 50 == 0:
            time.sleep(5)
    return pd.DataFrame(geocoded_data, index=batch_df.index)

# Create a ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor() as executor:
    # Create batches of addresses
    batch_size = 50
    batches = [df1975[i:i+batch_size] for i in range(0, len(df1975), batch_size)]

    # Submit the batches for processing
    futures = [executor.submit(process_batch, batch) for batch in batches]

    # Collect the results from the futures
    results = []
    for future in as_completed(futures):
        results.append(future.result())

    # Concatenate the results into a single DataFrame
    geocoded_df = pd.concat(results)

# Combine the ID_1975, StandardizedAddress1975 columns with the geocoded data
result_df = pd.concat([df1975[['ID_1975', 'Standardized_Address_1975']], geocoded_df], axis=1)

# Save the updated dataframe to a new Excel sheet
output_file = 'geocoded_data_1975_1_to_25000.xlsx'
result_df.to_excel(output_file, index=False, engine='openpyxl')
print(f"Data saved to {output_file}")

# Print the count of API calls
print(f"Total API calls: {api_call_count}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Address found in Lubbock County: 3303, 67th Street, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 3303, 67th Street, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 3303, 67th Street, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 4918, 15th Street, Pheasant Ridge, Lubbock, Lubbock County, Texas, 79416, United States
Address found in Lubbock County: 3303, 67th Street, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 3303, 67th Street, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 1608, Bangor Avenue, Alford Terrace, Lubbock, Lubbock County, Texas, 79416, United States
Address found in Lubbock County: 2222, 3rd Street, Jackson-Mahon, Lubbock, Lubbock County, Texas, 79415, United States
Address found in Lubbock County: 1524, 28th Str

In [None]:
import pandas as pd
import googlemaps
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

# Initialize the Google Maps client with your API key
gmaps = googlemaps.Client(key='AIzaSyBYrrIZUzs0oGh8o1aZb7AsEbd3cLEz5os')

# Function to geocode an address and check if it's in Lubbock County, Texas
def geocode_address(address, year):
    if pd.isna(address) or address.strip() == '':
        return pd.Series([None])

    try:
        result = gmaps.geocode(address)
        if result:
            # Extract the relevant information from the API response
            formatted_address = result[0]['formatted_address']
            geometry = result[0]['geometry']
            address_components = result[0]['address_components']

            # Create a dictionary to store the extracted information
            data = {
                f'Google_Standard_address_{year}': formatted_address,
                f'latitude_{year}': geometry['location']['lat'],
                f'longitude_{year}': geometry['location']['lng']
            }

            # Iterate over the address components and add them as columns
            for component in address_components:
                for type_ in component['types']:
                    column_name = f"{type_}_{year}"
                    data[column_name] = component['long_name']

            # Check if the address is in Lubbock County, Texas
            if any(component['long_name'] == 'Lubbock County' and 'administrative_area_level_2' in component['types'] for component in address_components):
                print(f"Address found in Lubbock County: {address}")
            else:
                print(f"Address not in Lubbock County: {address}")

            return pd.Series(data)

    except Exception as e:
        print(f"Error geocoding address: {address}. Error: {str(e)}")

    # Return None for all columns if geocoding fails
    return pd.Series([None])

# Create a sample dataframe with example addresses
data = pd.read_excel('/content/1975_Standardized_Final - 25001 to last.xlsx')
df1975 = pd.DataFrame(data)

# Initialize API call counter
api_call_count = 0

# Function to process a batch of addresses
def process_batch(batch_df):
    global api_call_count
    geocoded_data = []
    for _, row in batch_df.iterrows():
        address = row['Standardized_Address_1975']
        if not pd.isna(address) and address.strip() != '':
            result = geocode_address(address, 1975)
            geocoded_data.append(result)
            api_call_count += 1
        else:
            geocoded_data.append(pd.Series([None]))

        # Add a delay of 5 seconds after every 50 API calls
        if api_call_count % 50 == 0:
            time.sleep(5)
    return pd.DataFrame(geocoded_data, index=batch_df.index)

# Create a ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor() as executor:
    # Create batches of addresses
    batch_size = 50
    batches = [df1975[i:i+batch_size] for i in range(0, len(df1975), batch_size)]

    # Submit the batches for processing
    futures = [executor.submit(process_batch, batch) for batch in batches]

    # Collect the results from the futures
    results = []
    for future in as_completed(futures):
        results.append(future.result())

    # Concatenate the results into a single DataFrame
    geocoded_df = pd.concat(results)

# Combine the ID_1975, StandardizedAddress1975 columns with the geocoded data
result_df = pd.concat([df1975[['ID_1975', 'Standardized_Address_1975']], geocoded_df], axis=1)

# Save the updated dataframe to a new Excel sheet
output_file = 'geocoded_data_1975_25000_to_last.xlsx'
result_df.to_excel(output_file, index=False, engine='openpyxl')
print(f"Data saved to {output_file}")

# Print the count of API calls
print(f"Total API calls: {api_call_count}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Address found in Lubbock County: 310, Temple Avenue, Jackson-Mahon, Lubbock, Lubbock County, Texas, 79415, United States
Address found in Lubbock County: 5206, 27th Street, Bowie, Lubbock, Lubbock County, Texas, 79407, United States
Address found in Lubbock County: 1914, 30th Street, Heart of Lubbock, Lubbock, Lubbock County, Texas, 79411, United States
Address found in Lubbock County: 1625, Avenue Y, South Overton, Lubbock, Lubbock County, Texas, 79401, United States
Address found in Lubbock County: 2417, 23rd Street, Heart of Lubbock, Lubbock, Lubbock County, Texas, 79411, United States
Address found in Lubbock County: 2417, 23rd Street, Heart of Lubbock, Lubbock, Lubbock County, Texas, 79411, United States
Address found in Lubbock County: 1934, East Colgate Street, Parkway and Cherry Point, Lubbock, Lubbock County, Texas, 79403, United States
Address found in Lubbock County: 2125, 71st Street, Bayless-Atkins, Lubbock, 

In [None]:
import pandas as pd

# Read the two Excel files into DataFrames
df_1_to_25000 = pd.read_excel('/content/geocoded_data_1975_1_to_25000.xlsx')
df_25001_to_last = pd.read_excel('/content/geocoded_data_1975_25001_to_last.xlsx')

# Concatenate the two DataFrames vertically
combined_df = pd.concat([df_1_to_25000, df_25001_to_last], ignore_index=True)

# Save the combined DataFrame to a new Excel file
output_file = 'geocoded_data_1975_combined.xlsx'
combined_df.to_excel(output_file, index=False, engine='openpyxl')
print(f"Combined data saved to {output_file}")

Combined data saved to geocoded_data_1975_combined.xlsx


# Standardizing 1985 Addresses as per Google's official address format

In [None]:
import pandas as pd
import googlemaps
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

# Initialize the Google Maps client with your API key
gmaps = googlemaps.Client(key='AIzaSyBYrrIZUzs0oGh8o1aZb7AsEbd3cLEz5os')

# Function to geocode an address and check if it's in Lubbock County, Texas
def geocode_address(address, year):
    if pd.isna(address) or address.strip() == '':
        return pd.Series([None])

    try:
        result = gmaps.geocode(address)
        if result:
            # Extract the relevant information from the API response
            formatted_address = result[0]['formatted_address']
            geometry = result[0]['geometry']
            address_components = result[0]['address_components']

            # Create a dictionary to store the extracted information
            data = {
                f'Google_Standard_address_{year}': formatted_address,
                f'latitude_{year}': geometry['location']['lat'],
                f'longitude_{year}': geometry['location']['lng']
            }

            # Iterate over the address components and add them as columns
            for component in address_components:
                for type_ in component['types']:
                    column_name = f"{type_}_{year}"
                    data[column_name] = component['long_name']

            # Check if the address is in Lubbock County, Texas
            if any(component['long_name'] == 'Lubbock County' and 'administrative_area_level_2' in component['types'] for component in address_components):
                print(f"Address found in Lubbock County: {address}")
            else:
                print(f"Address not in Lubbock County: {address}")

            return pd.Series(data)

    except Exception as e:
        print(f"Error geocoding address: {address}. Error: {str(e)}")

    # Return None for all columns if geocoding fails
    return pd.Series([None])

# Create a sample dataframe with example addresses
data = pd.read_excel('/content/1985_Standardized_Final.xlsx')
df1985 = pd.DataFrame(data)

# Initialize API call counter
api_call_count = 0

# Function to process a batch of addresses
def process_batch(batch_df):
    global api_call_count
    geocoded_data = []
    for _, row in batch_df.iterrows():
        address = row['Standardized_Address_1985']
        if not pd.isna(address) and address.strip() != '':
            result = geocode_address(address, 1985)
            geocoded_data.append(result)
            api_call_count += 1
        else:
            geocoded_data.append(pd.Series([None]))

        # Add a delay of 5 seconds after every 50 API calls
        if api_call_count % 50 == 0:
            time.sleep(5)
    return pd.DataFrame(geocoded_data, index=batch_df.index)

# Create a ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor() as executor:
    # Create batches of addresses
    batch_size = 50
    batches = [df1985[i:i+batch_size] for i in range(0, len(df1985), batch_size)]

    # Submit the batches for processing
    futures = [executor.submit(process_batch, batch) for batch in batches]

    # Collect the results from the futures
    results = []
    for future in as_completed(futures):
        results.append(future.result())

    # Concatenate the results into a single DataFrame
    geocoded_df = pd.concat(results)

# Combine the ID_1985, StandardizedAddress1985 columns with the geocoded data
result_df = pd.concat([df1985[['ID_1985', 'Standardized_Address_1985']], geocoded_df], axis=1)

# Save the updated dataframe to a new Excel sheet
output_file = 'geocoded_data_1985.xlsx'
result_df.to_excel(output_file, index=False, engine='openpyxl')
print(f"Data saved to {output_file}")

# Print the count of API calls
print(f"Total API calls: {api_call_count}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Address found in Lubbock County: 3403, East 18th Street, Windmill, Lubbock, Lubbock County, Texas, 79403, United States
Address found in Lubbock County: 5520, 78th Street, Farrar, Lubbock, Lubbock County, Texas, 79424, United States
Address found in Lubbock County: 2503, Ash Avenue, Lubbock, Lubbock County, Texas, 79404, United States
Address found in Lubbock County: 3301, 60th Street, Caprock, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 3405, East 18th Street, Windmill, Lubbock, Lubbock County, Texas, 79403, United States
Address found in Lubbock County: 3313, East 17th Street, Windmill, Lubbock, Lubbock County, Texas, 79403, United States
Address found in Lubbock County: 3314, 62nd Street, Melonie Park, Lubbock, Lubbock County, Texas, 79413, United States
Address found in Lubbock County: 3407, East 18th Street, Windmill, Lubbock, Lubbock County, Texas, 79403, United States
Addre