In [3]:
import pandas as pd
import seaborn as sns
from sklearn import linear_model
from sklearn.metrics import r2_score
import numpy as np

In [6]:
df_population = pd.read_csv("population.csv")

In [7]:
df_population.shape

(88977, 8)

In [8]:
df_population.head() # forma de la tabla

Unnamed: 0,Statistic Label,CensusYear,General Health,Sex,Age Group,Towns,UNIT,VALUE
0,Population,2022,General health - All,Both sexes,All ages,"Tullow, Co Carlow",Number,5138
1,Population,2022,General health - All,Both sexes,All ages,"Carlow, Counties Carlow & Laois",Number,27351
2,Population,2022,General health - All,Both sexes,All ages,"Muinebeag (Bagenalstown), Co Carlow",Number,2945
3,Population,2022,General health - All,Both sexes,All ages,"Dublin city and suburbs, Counties Dublin & Meath",Number,1263219
4,Population,2022,General health - All,Both sexes,All ages,"Saggart, Co Dublin",Number,4573


In [10]:
df_population[['Towns', 'County']] = df_population['Towns'].str.split(', ', expand=True) #Agregue la columna County 

In [13]:
df_population.tail()

Unnamed: 0,Statistic Label,CensusYear,General Health,Sex,Age Group,Towns,UNIT,VALUE,County
88972,Population,2022,Not stated,Female,85 years and over,State,Number,4449,
88973,Population,2022,Not stated,Female,85 years and over,"Other towns with a population between 1,000 - ...",Number,911,
88974,Population,2022,Not stated,Female,85 years and over,Other towns with a population under 500,Number,152,
88975,Population,2022,Not stated,Female,85 years and over,Other towns with a population between 500 - 999,Number,72,
88976,Population,2022,Not stated,Female,85 years and over,Remainder of country,Number,66,


In [26]:
df_population_total = df_population[(df_population['General Health'] != 'not started') & 
                                    (df_population['General Health'] == 'General health - All') & 
                                    (df_population['Sex'] == 'Both sexes') & 
                                    (df_population['Age Group'] == 'All ages')]
#filtrado de tabla para obtener los totales por County

In [27]:
df_population_total.shape

(223, 9)

In [28]:
df_population_total.tail()

Unnamed: 0,Statistic Label,CensusYear,General Health,Sex,Age Group,Towns,UNIT,VALUE,County
218,Population,2022,General health - All,Both sexes,All ages,State,Number,5149139,
219,Population,2022,General health - All,Both sexes,All ages,"Other towns with a population between 1,000 - ...",Number,1518638,
220,Population,2022,General health - All,Both sexes,All ages,Other towns with a population under 500,Number,132170,
221,Population,2022,General health - All,Both sexes,All ages,Other towns with a population between 500 - 999,Number,110857,
222,Population,2022,General health - All,Both sexes,All ages,Remainder of country,Number,107434,


In [23]:
Counties = df_population_total['County'].value_counts()
print(Counties)

Co Cork                           27
Co Kildare                        17
Co Meath                          14
Co Dublin                         13
Co Galway                         12
Co Tipperary                      11
Co Wicklow                        10
Co Donegal                         9
Co Cavan                           8
Co Kerry                           8
Co Louth                           7
Co Wexford                         7
Co Mayo                            6
Co Sligo                           6
Co Clare                           6
Co Limerick                        6
Co Offaly                          5
Co Westmeath                       5
Co Roscommon                       4
Co Waterford                       4
Co Monaghan                        4
Co Laois                           4
Co Longford                        3
Co Kilkenny                        3
Co Carlow                          2
Counties Dublin & Meath            1
Counties Kildare & Meath           1
C

In [30]:
population_by_County = df_population_total.groupby('County')['VALUE'].sum()
print(population_by_County) # total por County, Todos los sexos, todas las edades, todas las cat de salud

#Se excluyen los no iniciados y Towns con menos de 1500 habitantes

County
Co Carlow                            8083
Co Cavan                            28915
Co Clare                            47213
Co Cork                            369317
Co Donegal                          48603
Co Dublin                          159248
Co Galway                          131551
Co Kerry                            55839
Co Kildare                         164720
Co Kilkenny                         32167
Co Laois                            32366
Co Leitrim                           1667
Co Limerick                         18631
Co Longford                         14865
Co Louth                            58216
Co Mayo                             40260
Co Meath                           123688
Co Monaghan                         19450
Co Offaly                           33522
Co Roscommon                        14205
Co Sligo                            30152
Co Tipperary                        52130
Co Waterford                        24970
Co Westmeath               

In [55]:
dist_population = df_population[(df_population['Sex'] != 'Both sexes') & 
                 (df_population['Age Group'] != 'All ages')]

In [56]:
dist_population.head(50)

Unnamed: 0,Statistic Label,CensusYear,General Health,Sex,Age Group,Towns,UNIT,VALUE,County
4460,Population,2022,General health - All,Male,0 - 4 years,Tullow,Number,167,Co Carlow
4461,Population,2022,General health - All,Male,0 - 4 years,Carlow,Number,812,Counties Carlow & Laois
4462,Population,2022,General health - All,Male,0 - 4 years,Muinebeag (Bagenalstown),Number,86,Co Carlow
4463,Population,2022,General health - All,Male,0 - 4 years,Dublin city and suburbs,Number,34988,Counties Dublin & Meath
4464,Population,2022,General health - All,Male,0 - 4 years,Saggart,Number,361,Co Dublin
4465,Population,2022,General health - All,Male,0 - 4 years,Rathcoole,Number,287,Co Dublin
4466,Population,2022,General health - All,Male,0 - 4 years,Newcastle,Number,186,Co Dublin
4467,Population,2022,General health - All,Male,0 - 4 years,Rush,Number,346,Co Dublin
4468,Population,2022,General health - All,Male,0 - 4 years,Skerries,Number,319,Co Dublin
4469,Population,2022,General health - All,Male,0 - 4 years,Portmarnock,Number,360,Co Dublin


In [67]:
dist_population = df_population[df_population['Sex'].isin(['male', 'female'])]

In [71]:
dist_population = dist_population.groupby(['Sex', 'Age Group', 'County']).agg({'VALUE': 'sum'})

In [72]:
dist_population.shape

(2394, 1)

In [73]:
dist_population.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,VALUE
Sex,Age Group,County,Unnamed: 3_level_1
Both sexes,0 - 4 years,Co Carlow,1002
Both sexes,0 - 4 years,Co Cavan,3996
Both sexes,0 - 4 years,Co Clare,5788
Both sexes,0 - 4 years,Co Cork,40592
Both sexes,0 - 4 years,Co Donegal,6540
