In [None]:
import requests
import pandas as pd
import time


# Documentation: 

### **Filling Missing State Names using Google Maps API**

**Objective**: fill in missing state names for rows that had valid latitude and longitude coordinates.

**Caching Mechanism**: To minimize Google API costs and redundant requests, we implemented a caching mechanism. For each latitude-longitude pair, the state name result was cached. If the same coordinates were encountered again, we used the cached value instead of making another API call.

**Error Handling**: We used a `try-except` block to manage network issues and potential errors. If an API request failed, it was gracefully handled and logged to ensure continuity.

**Rate Limiting**: Google Maps API has a rate limit, so we introduced a delay of 0.1 seconds between requests to prevent rate limit violations.

**Standardization of State Names**: Certain states have multiple valid names (e.g., "NCT of Delhi" vs. "Delhi"). To ensure consistency, state names were standardized after being fetched.


# Find State Name 

In [None]:

# Cache to store previous results
cache = {}

# Function to find the state name using Google Maps API
def find_indian_state_google_maps(latitude, longitude, api_key):
    """
    Finds the state name associated with a set of coordinates using Google Maps API
    and verifies if the coordinates are within India.

    Args:
        latitude, longitude (float, float): Latitude and longitude coordinates
        api_key (str): Google Maps API key

    Returns:
        state_name (str): If coordinates are within India, return the state name
        None (None): If the coordinates are outside India or no state is found
    """
    
    # Check if the coordinates are already cached
    if (latitude, longitude) in cache:
        return cache[(latitude, longitude)]

    # Construct the full URL with latlng and API key in the base URL
    base_url = f"https://maps.googleapis.com/maps/api/geocode/json?latlng={latitude},{longitude}&key={api_key}"

    # Send the request to Google Maps API
    try:
        response = requests.get(base_url)
        response.raise_for_status()  # Raise an exception for HTTP errors
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

    # Parse the JSON response
    data = response.json()

    # Check if the response is successful and contains results
    if response.status_code == 200 and 'results' in data and data['results']:
        # Loop through the results to extract the state name and check the country
        for result in data['results']:
            # Check the address components for country and state
            country = None
            state_name = None
            for component in result['address_components']:
                if 'country' in component['types']:
                    country = component['long_name']
                if 'administrative_area_level_1' in component['types']:
                    state_name = component['long_name']
            
            # If the country is India, return the state name
            if country == 'India' and state_name:
                cache[(latitude, longitude)] = state_name
                return state_name

    # Return None if no state or the wrong country is found
    return None


# Fill in Missing State names

In [None]:
# Function to fill in missing state names in the DataFrame
def fill_missing_state_names(df, api_key):
    start_time = time.time()  # Measure the time taken
    counter = 0  # Number of observations checked so far

    # Subset: observations with all data except State_name, and drop those with missing lat/long
    observations_missing_state_name = df[df["State_name"].isna()].drop(['State_name'], axis=1).dropna(subset=['Lat', 'Long'])

    # Iterate over observations missing a state name
    for idx, row in observations_missing_state_name.iterrows():
        if (counter % 1000) == 0:  # Report time every 1000 observations
            print(f"{counter}: {time.time() - start_time} seconds elapsed; {counter / len(observations_missing_state_name) * 100:.2f}% Done")

        # Call the Google Maps API function to find the state name based on coordinates
        state_name = find_indian_state_google_maps(row["Lat"], row["Long"], api_key)

        if state_name:  # Update the state name if found
            df.at[idx, "State_name"] = state_name

        counter += 1
        time.sleep(0.1)  # Adding delay to prevent rate limit issues

    # Standardize the state names with known variations
    df['State_name'] = df['State_name'].replace({
        'Andaman and Nicobar': 'Andaman and Nicobar Islands',
        'NCT of Delhi': 'Delhi'
    })

    print(f"Finished in {time.time() - start_time} seconds")



In [None]:

# Load the data
df = pd.read_csv("alldata.csv")

# Replace with your actual API key
api_key = 'USE YOUR API KEY'

# Test Google Maps API function
state = find_indian_state_google_maps(11.62616, 75.57842, api_key)
print("Detected State:", state)

# Fill in the missing state names in the dataset
fill_missing_state_names(df, api_key)

# Save the updated dataframe to a CSV file (optional)
df.to_csv("alldata_updated.csv", index=False)