# Population

#### By Ignacio Riboldi

## PART 1

In [31]:
import pandas as pd

url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/FY006A/CSV/1.0/en"
df = pd.read_csv(url)
df.tail(3)

Unnamed: 0,STATISTIC,Statistic Label,TLIST(A1),CensusYear,C02199V02655,Sex,C02076V03371,Single Year of Age,C03789V04537,Administrative Counties,UNIT,VALUE
9789,FY006AC01,Population,2022,2022,2,Female,650,100 years and over,2ae19629-149d-13a3-e055-000000000001,Cavan County Council,Number,12
9790,FY006AC01,Population,2022,2022,2,Female,650,100 years and over,2ae19629-14a4-13a3-e055-000000000001,Donegal County Council,Number,31
9791,FY006AC01,Population,2022,2022,2,Female,650,100 years and over,2ae19629-1495-13a3-e055-000000000001,Monaghan County Council,Number,7


In [32]:
df = df[df["Sex"] != "Both sexes"]

In [33]:
headers = df.columns.tolist()
headers

['STATISTIC',
 'Statistic Label',
 'TLIST(A1)',
 'CensusYear',
 'C02199V02655',
 'Sex',
 'C02076V03371',
 'Single Year of Age',
 'C03789V04537',
 'Administrative Counties',
 'UNIT',
 'VALUE']

In [34]:
drop_col_list = ['STATISTIC', 'Statistic Label','TLIST(A1)','CensusYear','C02199V02655','C02076V03371','C03789V04537','UNIT']
df.drop(columns=drop_col_list, inplace=True)
df = df[df["Single Year of Age"] != "All ages"]
df['Single Year of Age'] = df['Single Year of Age'].str.replace('Under 1 year', '0')
df['Single Year of Age'] = df['Single Year of Age'].str.replace('\D', '', regex=True)
df['Single Year of Age']=df['Single Year of Age'].astype('int64')
df['VALUE']=df['VALUE'].astype('int64')

#print (df.head(3))
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6464 entries, 3296 to 9791
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Sex                      6464 non-null   object
 1   Single Year of Age       6464 non-null   int64 
 2   Administrative Counties  6464 non-null   object
 3   VALUE                    6464 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 252.5+ KB


In [35]:
# Calculate weighted age
df["weighted_age"] = df["Single Year of Age"] * df["VALUE"]

# Group by sex and compute total weighted ages and total population
weighted_means = (
    df.groupby("Sex", as_index=False)
      .agg(
          total_weighted_age=("weighted_age", "sum"),
          total_population=("VALUE", "sum")
      )
)

# Weighted mean age for each sex
weighted_means["weighted_mean_age"] = (
    weighted_means["total_weighted_age"] / weighted_means["total_population"]
)

# If there are exactly two sexes, calculate the difference between their mean ages
if weighted_means["Sex"].nunique() == 2:
    # Get the difference (absolute value in case the results is negative)
    diff = abs(weighted_means.loc[0, "weighted_mean_age"] - weighted_means.loc[1, "weighted_mean_age"])
    
    # Create a new row with the difference value
    diff_row = pd.DataFrame({
        "Sex": ["Difference_between_sexes"],
        "total_weighted_age": [None],
        "total_population": [None],
        "weighted_mean_age": [diff]
    })
    
    # Append the new row
    weighted_means = pd.concat([weighted_means, diff_row], ignore_index=True)

# Print
print(weighted_means[["Sex", "weighted_mean_age"]])


                        Sex  weighted_mean_age
0                    Female          38.939796
1                      Male          37.739448
2  Difference_between_sexes           1.200348


## PART 2

In [38]:
# Age number
num = 35

# Range within 5 years
lower = num - 5
upper = num + 5

# Filter
people_in_range = df[(df["Single Year of Age"] >= lower) & (df["Single Year of Age"] <= upper)]

# Count by sex
people_by_sex = (
    people_in_range.groupby("Sex", as_index=False)["VALUE"].sum()
    .rename(columns={"VALUE": "people_in_range"})
)

# Total
total_people = people_by_sex["people_in_range"].sum()

# Difference by sex
male_value = people_by_sex.loc[people_by_sex["Sex"] == "Male", "people_in_range"].values[0]
female_value = people_by_sex.loc[people_by_sex["Sex"] == "Female", "people_in_range"].values[0]
diff = abs(male_value - female_value)


# Result

print(people_by_sex.to_string(index=False))
print(f"\nTotal number of people in that age range (both sexes): {total_people:,}")
print(f"Difference between male and female is {diff:,} people.")


   Sex  people_in_range
Female           829012
  Male           768060

Total number of people in that age range (both sexes): 1,597,072
Difference between male and female is 60,952 people.


# PART 3

In [42]:
# Age range
lower, upper = 30, 40

# Filter by age
subset = df[(df["Single Year of Age"] >= lower) & (df["Single Year of Age"] <= upper) & (df["Administrative Counties"] != "Ireland")  # <- excluye total
]

# Grouping by region and sex
region_sex = subset.groupby(["Administrative Counties", "Sex"], as_index=False)["VALUE"].sum()

# Absolute difference
region_pivot["sex_difference"] = abs(region_pivot["Male"] - region_pivot["Female"])

# Region with the biggest difference
max_region = region_pivot["sex_difference"].idxmax()
max_diff = region_pivot.loc[max_region, "sex_difference"]

# Results
print(f"Region with biggest population difference between sexes (ages 30–40):")
print(f"{max_region} — {max_diff:,} people difference")


Region with biggest population difference between sexes (ages 30–40):
Fingal County Council — 2,942 people difference


# END