In [1]:
import pandas as pd
import csv

young_lower_bound = 18
prof_lower_bound = 30
old_lower_bound = 65


csv_filepath = "constituency_population_by_age.csv"
data_list = []

# read in data
with open(csv_filepath, newline='', encoding='utf-8') as csvfile:
    csv_reader = csv.reader(csvfile)
    
    for row in csv_reader:
        new_row = [row[1], row[6], row[7]]
        data_list.append(new_row)

data_list.remove(data_list[0])

# print (data_list[:10])
for item in data_list:
    constituency, age, count = item


df = pd.DataFrame(data_list, columns=['Constituency', 'Age', 'Count'])

df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
df['Count'] = pd.to_numeric(df['Count'], errors='coerce')

# print (df.head(10))
mask_young = (df['Age'] >= young_lower_bound) & (df['Age'] <= prof_lower_bound)
mask_prof = (df['Age'] > prof_lower_bound) & (df['Age'] <= old_lower_bound)
mask_old = (df['Age'] > old_lower_bound)

# this calculates the population for each age range within each constituency
young_counts = df[mask_young].groupby('Constituency')['Count'].sum().reset_index(name='Young_Count')
prof_counts = df[mask_prof].groupby('Constituency')['Count'].sum().reset_index(name='Prof_Count')
old_counts = df[mask_old].groupby('Constituency')['Count'].sum().reset_index(name='Old_Count')

# this merges the results into a single DataFrame
result_df = pd.merge(young_counts, prof_counts, on='Constituency', how='outer')
result_df = pd.merge(result_df, old_counts, on='Constituency', how='outer')

#   this fills empty values with 0 (in case some constituencies don't have data for all age ranges)
result_df.fillna(0, inplace=True)

# this calculates the total relevant population for each constituency
result_df['Total_Count'] = result_df['Young_Count'] + result_df['Prof_Count'] + result_df['Old_Count']


# this calculates the percentages
result_df['Young_Perc'] = (result_df['Young_Count'] / result_df['Total_Count']) * 100
result_df['Prof_Perc'] = (result_df['Prof_Count'] / result_df['Total_Count']) * 100
result_df['Old_Perc'] = (result_df['Old_Count'] / result_df['Total_Count']) * 100


#print (result_df.head(10))

# this calculates mean and standard deviation of each age
mean_young_perc = result_df['Young_Perc'].mean()
std_young_perc = result_df['Young_Perc'].std()


mean_prof_perc = result_df['Prof_Perc'].mean()
std_prof_perc = result_df['Prof_Perc'].std()

mean_old_perc = result_df['Old_Perc'].mean()
std_old_perc = result_df['Old_Perc'].std()

print("Mean and Standard Deviation of Percentages:")
print(f"Young_Perc: Mean = {mean_young_perc:.2f}, Std Deviation = {std_young_perc:.2f}")
print(f"Prof_Perc: Mean = {mean_prof_perc:.2f}, Std Deviation = {std_prof_perc:.2f}")
print(f"Old_Perc: Mean = {mean_old_perc:.2f}, Std Deviation = {std_old_perc:.2f}")
















Mean and Standard Deviation of Percentages:
Young_Perc: Mean = 19.93, Std Deviation = 6.30
Prof_Perc: Mean = 57.27, Std Deviation = 3.75
Old_Perc: Mean = 22.80, Std Deviation = 5.97
