# Population

#### By Ignacio Riboldi

In [10]:
import pandas as pd

url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/FY006A/CSV/1.0/en"
df = pd.read_csv(url)
df.tail(3)

Unnamed: 0,STATISTIC,Statistic Label,TLIST(A1),CensusYear,C02199V02655,Sex,C02076V03371,Single Year of Age,C03789V04537,Administrative Counties,UNIT,VALUE
9789,FY006AC01,Population,2022,2022,2,Female,650,100 years and over,2ae19629-149d-13a3-e055-000000000001,Cavan County Council,Number,12
9790,FY006AC01,Population,2022,2022,2,Female,650,100 years and over,2ae19629-14a4-13a3-e055-000000000001,Donegal County Council,Number,31
9791,FY006AC01,Population,2022,2022,2,Female,650,100 years and over,2ae19629-1495-13a3-e055-000000000001,Monaghan County Council,Number,7


In [11]:
df = df[df["Sex"] != "Both sexes"]

In [12]:
headers = df.columns.tolist()
headers

['STATISTIC',
 'Statistic Label',
 'TLIST(A1)',
 'CensusYear',
 'C02199V02655',
 'Sex',
 'C02076V03371',
 'Single Year of Age',
 'C03789V04537',
 'Administrative Counties',
 'UNIT',
 'VALUE']

In [13]:
drop_col_list = ['STATISTIC', 'Statistic Label','TLIST(A1)','CensusYear', 'Administrative Counties','C02199V02655','C02076V03371','C03789V04537','UNIT']
df.drop(columns=drop_col_list, inplace=True)
df = df[df["Single Year of Age"] != "All ages"]
df['Single Year of Age'] = df['Single Year of Age'].str.replace('Under 1 year', '0')
df['Single Year of Age'] = df['Single Year of Age'].str.replace('\D', '', regex=True)

df['Single Year of Age']=df['Single Year of Age'].astype('int64')
df['VALUE']=df['VALUE'].astype('int64')
#print (df.head(3))
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6464 entries, 3296 to 9791
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Sex                 6464 non-null   object
 1   Single Year of Age  6464 non-null   int64 
 2   VALUE               6464 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 202.0+ KB


In [14]:
# Calculate weighted age
df["weighted_age"] = df["Single Year of Age"] * df["VALUE"]

# Group by sex and compute total weighted ages and total population
weighted_means = (
    df.groupby("Sex", as_index=False)
      .agg(
          total_weighted_age=("weighted_age", "sum"),
          total_population=("VALUE", "sum")
      )
)

# Weighted mean age for each sex
weighted_means["weighted_mean_age"] = (
    weighted_means["total_weighted_age"] / weighted_means["total_population"]
)

# If there are exactly two sexes, calculate the difference between their mean ages
if weighted_means["Sex"].nunique() == 2:
    # Get the difference (absolute value in case the results is negative)
    diff = abs(weighted_means.loc[0, "weighted_mean_age"] - weighted_means.loc[1, "weighted_mean_age"])
    
    # Create a new row with the difference value
    diff_row = pd.DataFrame({
        "Sex": ["Difference_between_sexes"],
        "total_weighted_age": [None],
        "total_population": [None],
        "weighted_mean_age": [diff]
    })
    
    # Append the new row
    weighted_means = pd.concat([weighted_means, diff_row], ignore_index=True)

# Print
print(weighted_means[["Sex", "weighted_mean_age"]])


                        Sex  weighted_mean_age
0                    Female          38.939796
1                      Male          37.739448
2  Difference_between_sexes           1.200348


#END