In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# load CSV file
df = pd.read_csv("mayors_core.csv")

# take a look
df.head()

Unnamed: 0,person,personLabel,birthDate,genderLabel,partyLabel,start,end,positionLabel,cityLabel
0,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18T00:00:00Z,male,Republican Party of Minnesota,1876-04-11T00:00:00Z,1877-04-10T00:00:00Z,Mayor of Minneapolis,Minneapolis
1,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18T00:00:00Z,male,Republican Party of Minnesota,1882-04-11T00:00:00Z,1884-04-08T00:00:00Z,Mayor of Minneapolis,Minneapolis
2,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18T00:00:00Z,male,Republican Party of Minnesota,1886-04-13T00:00:00Z,1889-01-07T00:00:00Z,Mayor of Minneapolis,Minneapolis
3,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18T00:00:00Z,male,Republican Party of Minnesota,1901-01-07T00:00:00Z,1902-08-27T00:00:00Z,Mayor of Minneapolis,Minneapolis
4,http://www.wikidata.org/entity/Q4668117,Abner C. Brownell,1813-01-01T00:00:00Z,male,Democratic Party,1852-01-01T00:00:00Z,1854-12-31T00:00:00Z,mayor of Cleveland,Cleveland


In [2]:
# Inspect structure
print("\n--- Dataset Info ---")
print(df.info())

print("\n--- First 5 Rows ---")
print(df.head())

print("\n--- Summary Statistics ---")
print(df.describe(include="all"))

# Convert date columns to datetime
for col in ["birthDate", "start", "end"]:
    df[col] = pd.to_datetime(df[col], errors="coerce")

# Create extra columns
df["start_year"] = df["start"].dt.year
df["end_year"] = df["end"].dt.year
df["birth_year"] = df["birthDate"].dt.year

# Age at start of mayorship
df["age_at_start"] = df["start_year"] - df["birth_year"]

print("\n--- Basic Properties ---")
print("Number of rows:", len(df))
print("Unique mayors:", df['personLabel'].nunique())
print("Cities covered:", df['cityLabel'].nunique())
print("Time range:", df['start_year'].min(), "-", df['end_year'].max())
print("Parties:", df['partyLabel'].nunique())
print("Genders:", df['genderLabel'].value_counts().to_dict())



--- Dataset Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 957 entries, 0 to 956
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   person         957 non-null    object
 1   personLabel    957 non-null    object
 2   birthDate      957 non-null    object
 3   genderLabel    957 non-null    object
 4   partyLabel     957 non-null    object
 5   start          957 non-null    object
 6   end            957 non-null    object
 7   positionLabel  957 non-null    object
 8   cityLabel      957 non-null    object
dtypes: object(9)
memory usage: 67.4+ KB
None

--- First 5 Rows ---
                                    person        personLabel  \
0  http://www.wikidata.org/entity/Q4647455         A. A. Ames   
1  http://www.wikidata.org/entity/Q4647455         A. A. Ames   
2  http://www.wikidata.org/entity/Q4647455         A. A. Ames   
3  http://www.wikidata.org/entity/Q4647455         A. A. Ames   
4  http:

### Conclusions:
1) We have full data coverage no null values anywhere
2) 838 unique mayors, 2 genders, 29 parties, 241 cities
3) 838 unique mayors out 957 total m, means that some mayors served more than once 

In [3]:
# Now let's have a look again
df.head()

Unnamed: 0,person,personLabel,birthDate,genderLabel,partyLabel,start,end,positionLabel,cityLabel,start_year,end_year,birth_year,age_at_start
0,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18 00:00:00+00:00,male,Republican Party of Minnesota,1876-04-11 00:00:00+00:00,1877-04-10 00:00:00+00:00,Mayor of Minneapolis,Minneapolis,1876,1877,1842,34
1,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18 00:00:00+00:00,male,Republican Party of Minnesota,1882-04-11 00:00:00+00:00,1884-04-08 00:00:00+00:00,Mayor of Minneapolis,Minneapolis,1882,1884,1842,40
2,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18 00:00:00+00:00,male,Republican Party of Minnesota,1886-04-13 00:00:00+00:00,1889-01-07 00:00:00+00:00,Mayor of Minneapolis,Minneapolis,1886,1889,1842,44
3,http://www.wikidata.org/entity/Q4647455,A. A. Ames,1842-01-18 00:00:00+00:00,male,Republican Party of Minnesota,1901-01-07 00:00:00+00:00,1902-08-27 00:00:00+00:00,Mayor of Minneapolis,Minneapolis,1901,1902,1842,59
4,http://www.wikidata.org/entity/Q4668117,Abner C. Brownell,1813-01-01 00:00:00+00:00,male,Democratic Party,1852-01-01 00:00:00+00:00,1854-12-31 00:00:00+00:00,mayor of Cleveland,Cleveland,1852,1854,1813,39


In [4]:
# Save cleaned dataset
df.to_csv("mayors_cleaned.csv", index=False)

## Add the cleaned version to Data

Now we will work with mayors_cleaned.csv file