In [15]:
# import pandas as pd
# import requests

# def geocode_city_state(city_state):
#     url = f"https://nominatim.openstreetmap.org/search?format=json&q={city_state}"
#     response = requests.get(url)
#     if response.status_code == 200:
#         data = response.json()
#         if data:
#             return data[0]['lat'], data[0]['lon']
#     return None, None

# # Read the original CSV file
# original_csv_file = 'cleaned_cham_api_1800_1963.csv'
# data = pd.read_csv(original_csv_file)

# # Function to extract latitude and longitude from place
# def extract_coordinates(row):
#     place = row['City']
#     longitude, latitude = geocode_city_state(place)
#     return longitude, latitude

# # Apply the function to the DataFrame
# data['reclong'], data['reclat'] = zip(*data.apply(extract_coordinates, axis=1))

# # Save the new CSV file
# new_csv_file = 'cham_geo_dataset_1800.csv'
# data.to_csv(new_csv_file, index=False)

# print("New CSV file has been created with latitude and longitude columns.")


In [16]:
# import pandas as pd
# import json

# def csv_to_geojson(csv_file):
#     data = pd.read_csv(csv_file)
    
#     features = []
#     for _, row in data.iterrows():
#         # Check if location coordinates are not NaN
#         if not pd.isna(row['reclong']) and not pd.isna(row['reclat']):
#             feature = {
#                 'type': 'Feature',
#                 'properties': {
#                     'date': row['date'],
#                     'title': row['title'],
#                     'id': row['id'],
#                     'place': row['place'],
#                 },
#                 'geometry': {
#                     'type': 'Point',
#                     'coordinates': [row['reclong'], row['reclat']]
#                 }
#             }
#             features.append(feature)
    
#     geojson_data = {
#         'type': 'FeatureCollection',
#         'features': features
#     }
    
#     return json.dumps(geojson_data)

# # Example usage:
# csv_file = 'cham_geo_dataset_1800.csv'
# geojson = csv_to_geojson(csv_file)

# # Save the GeoJSON to a file
# output_geojson_file = 'cham_1800_1963.geojson'
# with open(output_geojson_file, 'w') as f:
#     f.write(geojson)

# print("success")


In [17]:
import pandas as pd
import requests
import json
import time

def geocode_city_state(city_state, max_retries=3):
    # List of known problematic inputs to skip processing
    skip_list = ['City1, State1', 'City2, State2', 'City3, State3']
    
    if city_state in skip_list:
        print("Skipping processing for:", city_state)
        return None, None
    
    retries = 0
    while retries < max_retries:
        try:
            print("Geocoding:", city_state)
            url = f"https://nominatim.openstreetmap.org/search?format=json&q={city_state}"
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                if data:
                    return data[0]['lat'], data[0]['lon']
        except Exception as e:
            print("Error:", e)
        
        # Increment retry count and wait before retrying
        retries += 1
        if retries < max_retries:
            print("Retrying after 3 seconds...")
            time.sleep(3)
    
    # Return None if all retries failed
    return None, None

# Read the original CSV file
original_csv_file = 'cleaned_cham_api_1800_1963.csv'
data = pd.read_csv(original_csv_file)

# Updated function to extract latitude and longitude from place
def extract_coordinates(row):
    if pd.isna(row['City']) or pd.isna(row['State']):
        return None, None
    
    city_state = row['City'] + ', ' + row['State']
    latitude, longitude = geocode_city_state(city_state)
    return latitude, longitude

# Add timestamps to track execution time
start_time = time.time()

# Apply the function to the DataFrame
data['reclat'], data['reclong'] = zip(*data.apply(extract_coordinates, axis=1))

# Check for NaN values
print("NaN values after geocoding:", data[['reclat', 'reclong']].isnull().sum())

# Drop rows with NaN values
data.dropna(subset=['reclat', 'reclong'], inplace=True)

# Save the new CSV file
new_csv_file = 'cham_geo_dataset_1800.csv'
data.to_csv(new_csv_file, index=False)
print("CSV file saved successfully.")

# Print execution time
end_time = time.time()
execution_time = end_time - start_time
print("Data processing completed in", execution_time, "seconds.")

def csv_to_geojson(csv_file):
    data = pd.read_csv(csv_file)
    
    features = []
    for _, row in data.iterrows():
        feature = {
            'type': 'Feature',
            'properties': {
                'date': row['date'],
                'title': row['title'],
                'id': row['id'],
                'place': row['place']
            },
            'geometry': {
                'type': 'Point',
                'coordinates': [row['reclong'], row['reclat']]
            }
        }
        features.append(feature)
    
    geojson_data = {
        'type': 'FeatureCollection',
        'features': features
    }
    
    return json.dumps(geojson_data)

# Example usage:
csv_file = 'cham_geo_dataset_1800.csv'
geojson = csv_to_geojson(csv_file)

# Save the GeoJSON to a file
output_geojson_file = 'cham_1800_1963.geojson'
with open(output_geojson_file, 'w') as f:
    f.write(geojson)

print("GeoJSON file has been created successfully.")


Geocoding: Omaha, Neb.
Geocoding: Washington, D.C.
Geocoding: Washington, D.C.
Geocoding: Salt Lake City, Utah
Geocoding: Washington, D.C.
Geocoding: Washington, D.C.
Geocoding: Great Falls, Mont.
Geocoding: Washington, D.C.
Geocoding: Washington, D.C.
Geocoding: Worcester, Mass.
Geocoding: Richmond, Va.
Geocoding: Washington, D.C.
Geocoding: Honolulu, Oahu
Geocoding: Montgomery, Ala.
Geocoding: New York, N.Y.
Geocoding: Birmingham, Ala.
Geocoding: Detroit, Mich
Geocoding: Red Lodge, Mont.
Geocoding: Cordele, Georgia
Geocoding: New York, N.Y.
Geocoding: Washington, D.C.
Geocoding: Minneapolis, Minn.
Geocoding: Sauk Centre, Stearns County
Geocoding: Jackson, Miss.
Geocoding: Detroit, Mich
Geocoding: New York, N.Y.
Geocoding: Seattle, Wash. Terr. [Wash.
Geocoding: Rock Island, Ill.
Geocoding: Las Vegas, Nev.
Geocoding: Nome, Alaska
Geocoding: Monroe City, Mo.
Geocoding: Seward, Alaska
Geocoding: Marietta, Ohio
Geocoding: Topeka, Kansas
Geocoding: Washington, D.C.
Geocoding: Tulsa, Okla.


KeyboardInterrupt: 