In [7]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
cliopatria = gpd.read_file("../cliopatria.geojson")
cliopatria.sample(5)

In [None]:
# Get the range of years available in the dataset
# Note: the range() function in Python generates numbers up to, but not including, the stop value
years = range(cliopatria['FromYear'].min(),
              cliopatria['ToYear'].max() + 1)
years

In [None]:
# Calculate the number of of unique entries in the Name column
len(cliopatria['Name'].unique())

In [None]:
# Calculate the number of unique entries in the Name column where MemberOf is a blank string
cliopatria_minus_subpolities = cliopatria[cliopatria['MemberOf'] == '']
len(cliopatria_minus_subpolities['Name'].unique())

In [18]:
# Calculate the number of polities in the dataset for each year
polity_counts = []
for year in years:
    polity_counts.append(
        len(cliopatria_minus_subpolities[(cliopatria_minus_subpolities['FromYear'] <= year)
                                         & (cliopatria_minus_subpolities['ToYear'] >= year)
                                         ]['Name'].unique()
            )
        )

# Create a DataFrame with the results
polity_counts_df = pd.DataFrame({'Year': years, 'Number of polities': polity_counts})

In [None]:
# Take a look at 5 random years
polity_counts_df.sample(5)

In [None]:
# Plot the number of polities over time
plt.plot(polity_counts_df['Year'], polity_counts_df['Number of polities'])
plt.xlabel('Year CE')
plt.ylabel('Number of polities')
plt.title('Number of polities in Cliopatria dataset')
plt.show()