In [1]:
# import pandas data set
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from scipy.optimize import curve_fit
from matplotlib.ticker import ScalarFormatter

In [2]:
# Loading the dataset:
df1 = pd.read_csv('..\\Datasets_data.cso.ie\\PEA11.20231004T131042.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'Datasets_data.cso.ie\\PEA11.20231004T131042.csv'

In [None]:
# Displaying first 5 rows:
df1.head()

In [None]:
# Dropping the 'STATISTIC Label' column it's irrelevant for the analysis
df1_col_drop = ['STATISTIC Label','UNIT']
df1.drop(df1_col_drop ,axis=1, inplace=True) 

In [None]:
# Renaming the columns
df1 = df1.rename(columns={'Single Year of Age': 'Age_Group','VALUE':'Population'})

In [None]:
df1

In [None]:
# Dropping rows based on criteria
df1 = df1[~((df1['Sex'] == 'Both sexes') | (df1['Age_Group'] == 'All ages'))]

# Display the modified DataFrame
df1

In [None]:
# Summing up the values by year
df1_Yearly_Sum = df1.groupby('Year')['Population'].sum().reset_index()
# Formatting the 'Value' column in millions
df1_Yearly_Sum['Population'] = df1_Yearly_Sum['Population'].apply(lambda x: '{:.3f}M'.format(x/1000000))

df1_Yearly_Sum

In [None]:
# Plot the standardized data
plt.figure(figsize=(10, 10))
plt.plot(df1_Yearly_Sum['Year'], df1_Yearly_Sum['Population'], marker='o')
plt.xlabel('Year')
plt.ylabel('Population')
plt.title('Population Republic of Ireland Period: 1926-2023 ')
plt.yticks(df1_Yearly_Sum['Population'])
plt.show()

In [None]:
ax = sns.histplot(df1_Yearly_Sum.Year)
ax.set(xlabel='Year', ylabel='Population', title ='Republic of Ireland Population 1926-2023');

In [None]:
# Creating the DataFrame
data = {
    'Year': [1926, 1936, 1946, 1951, 1961, 1966, 1971, 1979, 1981, 1986, 1991, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
    'Population': [2.972, 2.968, 2.955, 2.961, 2.818, 2.884, 2.978, 3.368, 3.443, 3.541, 3.526, 3.626, 3.664, 3.703, 3.742, 3.79, 3.847, 3.917, 3.98, 4.045, 4.134, 4.233, 4.376, 4.485, 4.533, 4.555, 4.575, 4.594, 4.615, 4.645, 4.688, 4.74, 4.811, 4.885, 4.958, 5.03, 5.075, 5.184, 5.282]
}

df = pd.DataFrame(data)

# Fitting a linear regression model
X = df['Year'].values.reshape(-1, 1)
y = df['Population'].values

model = LinearRegression()
model.fit(X, y)

# Predicting for the next 10 years
future_years = [[2024 + i] for i in range(10)]
predicted_population = model.predict(future_years)

# Printing the predicted population for the next 10 years
print("Predicted population for the next 10 years:")
for year, population in zip(range(2024, 2034), predicted_population):
    print(f"Year {year}: {population:.3f}M")

In [None]:
df1

In [None]:
# Creating the DataFrame
data = {
    'Year': [1926, 1936, 1946, 1951, 1961, 1966, 1971, 1979, 1981, 1986, 1991, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
    'Population': [2.972, 2.968, 2.955, 2.961, 2.818, 2.884, 2.978, 3.368, 3.443, 3.541, 3.526, 3.626, 3.664, 3.703, 3.742, 3.79, 3.847, 3.917, 3.98, 4.045, 4.134, 4.233, 4.376, 4.485, 4.533, 4.555, 4.575, 4.594, 4.615, 4.645, 4.688, 4.74, 4.811, 4.885, 4.958, 5.03, 5.075, 5.184, 5.282]
}

df = pd.DataFrame(data)

# Defining the logistic function
def logistic_function(x, L, k, x0):
    return L / (1 + np.exp(-k * (x - x0)))

x_data = df['Year'].values
y_data = df['Population'].values

# Fitting the data to the logistic function
popt, pcov = curve_fit(logistic_function, x_data, y_data, bounds=([max(y_data), 0, 0], [2 * max(y_data), 1, max(x_data)]))

# Getting the parameters of the logistic function
L, k, x0 = popt

# Printing the parameters of the logistic function
print(f"L: {L}, k: {k}, x0: {x0}")

# Plotting the original data and the logistic function
plt.figure(figsize=(10, 6))
plt.scatter(x_data, y_data, label='Original Data')
plt.plot(x_data, logistic_function(x_data, *popt), 'r-', label='Logistic Fit')
plt.xlabel('Year')
plt.ylabel('Population (in millions)')
plt.title('Logistic Fit for Population Growth')
plt.legend()
plt.show();


# df2

In [None]:
# Loading the dataset:
df2 = pd.read_csv('../Datasets_data.cso.ie\\PEA07.20231004T131011.csv')

In [None]:
# Displaying first 5 rows:
df2.head()

In [None]:
# Dropping the 'STATISTIC Label' column it's irrelevant for the analysis
df2_col_drop = ['Statistic Label','UNIT']
df2.drop(df2_col_drop ,axis=1, inplace=True)  

In [None]:
df2['VALUE'] = df2['VALUE'].apply(lambda x: x * 1000)

In [None]:
df2

In [None]:
# Renaming the columns
df2 = df2.rename(columns={'Age Group': 'Age_Group','Regional Authority Area':'Area','VALUE':'Population'})

In [None]:
# Dropping rows based on criteria
df2 = df2[~((df2['Sex'] == 'Both sexes') | (df2['Area'] == 'State') | (df2['Age_Group'] == 'All ages'))]

# Display the modified DataFrame
df2

In [None]:
# Finding the sum of 'VALUE' per 'Area', 'Year', and 'Sex'
df2_Sum_Area_Year = df2.groupby(['Year','Area'])['Population'].sum().reset_index()

# Converting the series to a DataFrame
df2_Sum_Area_Year = pd.DataFrame(df2_Sum_Area_Year)

df2_Sum_Area_Year

In [None]:
from matplotlib.ticker import ScalarFormatter
# Creating small multiples of line plots
g = sns.FacetGrid(df2_Sum_Area_Year, col="Area", col_wrap=4, height=4, sharey=False)
g.map(sns.lineplot, "Year", "Population")
g.set_titles("{col_name}")
g.set_xlabels("Year")
g.set_ylabels("Population")
g.fig.suptitle("Population by Area Republic of Ireland 1995-2017", y=1.02)
# Formatting the y-axis tick labels to display full numbers
for ax in g.axes.flat:
    ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
    ax.yaxis.get_major_formatter().set_scientific(False)
plt.tight_layout() 
plt.show()


In [None]:
sns.boxplot(x = 'Area', y ='Population', data=df2_Sum_Area_Year, hue = 'Area')
plt.title('Box plot Administration', fontsize=15)
#plt.legend([], [], frameon=False)
plt.show()

In [None]:
df2_Sum_Area_Year

# Area breakdown:

1. **Border**: Cavan, Donegal, Leitrim, Monaghan, Sligo.

2. **Midland**: Laois, Longford, Offaly, Westmeath.

3. **West**: Galway, Mayo, Roscommon.

4. **Dublin**: Dublin City, Dún Laoghaire-Rathdown, Fingal, South Dublin.

5. **Mid-East**: Kildare, Louth, Meath, Wicklow.

6. **Mid-West**: Clare, Limerick, Tipperary.

7. **South-East**: Carlow, Kilkenny, Waterford, Wexford.

8. **South-West**: Cork, Kerry.

