# Ireland Age and Sex Analysis
## Author: Joanna Mnich


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Upload data
url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/FY006A/CSV/1.0/en"
df = pd.read_csv(url)

# Display first rows to understand structure
df.head()

In [None]:
#  Keep only needed columns 

df = df[df["Statistic"].str.contains("Population", case=False, na=False)]
df = df[["Sex", "Single Year of Age", "VALUE"]].copy()

In [None]:
# Clean data 

df = df[df["Single Year of Age"] != "All ages"]
df["Single Year of Age"] = df["Single Year of Age"].replace("Under 1 year", "0")
df["Single Year of Age"] = df["Single Year of Age"].str.replace(r"\D", "", regex=True)
df["Single Year of Age"] = df["Single Year of Age"].astype(int)
df["VALUE"] = df["VALUE"].astype(int)

In [None]:
# Keep only Male and Female

df = df[df["Sex"].isin(["Male", "Female"])]

print("\nCleaned dataset preview:")
print(df.head())

In [None]:
# Split dataset by sex 

male_df = df[df["Sex"] == "Male"]
female_df = df[df["Sex"] == "Female"]

In [None]:
# Weighted mean age for each sex

def weighted_mean_age(df_sex):
    total_people = df_sex["VALUE"].sum()
    total_age_weighted = (df_sex["Single Year of Age"] * df_sex["VALUE"]).sum()
    return total_age_weighted / total_people

male_df = df[df["Sex"] == "Male"]
female_df = df[df["Sex"] == "Female"]

male_mean = weighted_mean_age(male_df)
female_mean = weighted_mean_age(female_df)

print("\nWeighted Mean Age:")
print("Male:", round(male_mean, 2))
print("Female:", round(female_mean, 2))

In [None]:
# Calculate difference between sexes by age 

# Sort values so that within each age, Male and Female are in order
df_sorted = df.sort_values(["Single Year of Age", "Sex"]).reset_index(drop=True)

# Compute difference (Female - Male)
df_sorted["Difference (Female - Male)"] = df_sorted.groupby("Single Year of Age")["VALUE"].diff().fillna(0)

# Keep one row per age (the Female one, since it includes the diff)
df_diff = df_sorted[df_sorted["Sex"] == "Female"][["Single Year of Age", "Difference (Female - Male)"]]

print("\nDifference between sexes by age:")
print(df_diff.head())