In [None]:
#Takes historic school names and returns:
#Latitude	Longitude	GeoNames Name	Feature Code	Feature Class

##Test Input: 
##https://docs.google.com/spreadsheets/d/1VHF2QYGxQdMeTR1kGhoncK21sidmrk6Mis0h5VY4AiE/edit#gid=2140551881


In [None]:
import pandas as pd
import requests

def is_county_in_hierarchy(username, geonameId, target_county):
    hierarchy_url = "http://api.geonames.org/hierarchyJSON"
    params = {
        'geonameId': geonameId,
        'username': username
    }
    response = requests.get(hierarchy_url, params=params)
    if response.status_code == 200:
        hierarchy_data = response.json()
        for place in hierarchy_data.get('geonames', []):
            if target_county.lower() in place.get('name', '').lower():
                return True
    return False

def get_school_info(username, school, target_county, fuzzy):
    # Extract the first word of the school's name for broader search
    base_school_name = school.split()[0]
    school_name_extension = school.split()[1:]
    s = " "
    school_name_extension = s.join(school_name_extension)
    # Educational institution types to include in the search
    institution_types = [school_name_extension, "School", "College", "Academy", school_name_extension + " (historical)"]
    
    # Attempt searches for each institution type with the base school name
    for institution_type in institution_types:
        print("Searching ",f"{base_school_name} {institution_type}","\n")
        search_url = "http://api.geonames.org/searchJSON"
        search_params = {
            'q': f"{base_school_name} {institution_type}",
            'country': 'US',
            'adminCode1': 'TN',
            'username': username,
            'fuzzy': fuzzy,
            'maxRows': 100
        }
        search_response = requests.get(search_url, params=search_params)
        if search_response.status_code == 200:
            search_data = search_response.json()
            for result in search_data.get('geonames', []):
                if is_county_in_hierarchy(username, result['geonameId'], target_county) and result.get('fcode', '') == 'SCH':
                    return result['lat'], result['lng'], result.get('name', ''), result.get('fcode', ''), result.get('fcl', '')

    return None, None, None, None, None

# Load your CSV file into a DataFrame
csv_file_path = '...'  # Update with the actual path
df = pd.read_csv(csv_file_path)

geonames_username = '...'  # Update with your actual GeoNames username
fuzzy_value = 0.8  # Adjust based on your needs

for index, row in df.iterrows():
    school = row['SCHOOL']
    county = row['COUNTY']
    
    lat, lng, geoname_name, fcode, fcl = get_school_info(geonames_username, school, county, fuzzy_value)
    if lat and lng:
        df.at[index, 'Latitude'] = lat
        df.at[index, 'Longitude'] = lng
        df.at[index, 'GeoNames Name'] = geoname_name
        df.at[index, 'Feature Code'] = fcode
        df.at[index, 'Feature Class'] = fcl
    else:
        print(f"No valid results found for {school} in {county}")

# Save the updated DataFrame back to CSV
updated_csv_file_path = '/Users/matthewcook/Dropbox/DSP/Broadnax/Tennessee State_1938_April2_0.75-Fuzzy.csv'  # Update with the actual path
df.to_csv(updated_csv_file_path, index=False)
print(f"Updated CSV file saved to {updated_csv_file_path}.")


In [None]:
#TO DO
#Find Nearest Address:
##https://www.geonames.org/maps/us-reverse-geocoder.html#findNearestAddress
#Table image OCR?
#hmbd.org search automation
#other "State Association Black Schools"


In [None]:
##Cook 2024