# Give a date range

Goal: Turn dates from Karrow's index that are imprecise in some way and turn them into a range

In [None]:
import pandas as pd
import numpy as np
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)

In [None]:
# load in the file from Index to Place and Date combined with mapmaker's names
index_with_mapmakers = pd.read_csv('../outputs/index_to_place_and_date_and_mapmaker_names.csv')
index_with_mapmakers

In [None]:
# add empty columns for estimates of earliest and latest dates for map publication
index_with_mapmakers['earliest_date_estimate'] = np.nan
index_with_mapmakers['latest_date_estimate'] = np.nan
index_with_mapmakers

In [None]:
# iterate through the rows of the pandas dataframe and fill in the easier date estimates
# NOTE: in general, iterating through rows is poor practice in pandas, but in this case it makes the code easier, and the data set is small, so I'm not worried about performance.

total_data_count = len(index_with_mapmakers)

for index, row in index_with_mapmakers.iterrows():
    # get the date string
    date = row['date']

    # if the date is an integer (ie single year) between 1400 and 1900, then we can use it as the earliest and latest date estimate
    if date.isnumeric() and int(date) >= 1400 and int(date) <= 1900:
        index_with_mapmakers.at[index, 'earliest_date_estimate'] = int(date)
        index_with_mapmakers.at[index, 'latest_date_estimate'] = int(date)

# report out the number of single year dates that we found
single_year_dates_count = index_with_mapmakers['earliest_date_estimate'].notnull().sum()

logging.info(f'Found {single_year_dates_count} single year dates out of {total_data_count} total data points')
    
logging.info(f"Value that should be empty or NaN or not a number: {index_with_mapmakers['earliest_date_estimate'].iloc[2318]}")
index_with_mapmakers