In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [74]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


data_file = "Population_By_Region.csv"


In [76]:
import pandas as pd

# Create a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Math': [85, 92, 78],
        'Science': [90, 88, 95]}
df = pd.DataFrame(data)

print("Original DataFrame (df):\n", df)

# Melt the DataFrame
df_melted = df.melt(id_vars=['Name'], var_name='Subject', value_name='Score')

print("\nMelded DataFrame (df_melted):\n", df_melted)

Original DataFrame (df):
       Name  Math  Science
0    Alice    85       90
1      Bob    92       88
2  Charlie    78       95

Melded DataFrame (df_melted):
       Name  Subject  Score
0    Alice     Math     85
1      Bob     Math     92
2  Charlie     Math     78
3    Alice  Science     90
4      Bob  Science     88
5  Charlie  Science     95


In [78]:
df = pd.read_csv(data_file)

df_long = df.melt(id_vars=['Area'], var_name='Month', value_name='Population')

df_long['Year'] = df_long['Month'].str[:4]

df_long['Population'] = pd.to_numeric(df_long['Population'], errors='coerce')

average_population_by_year = df_long.groupby(['Area', 'Year'])['Population'].mean().unstack()

# Display the result
print(average_population_by_year)

# 4961754.58333333 = Average
# 4998478

Year                               1990          1991          1992  \
Area                                                                  
MA0 WHOLE COUNTRY          4.986176e+06  5.014060e+06  5.042555e+06   
MA1 MAINLAND FINLAND       4.961755e+06  4.989332e+06  5.017609e+06   
MA2 Åland                  2.442142e+04  2.472800e+04  2.494608e+04   
MK01 Uusimaa               1.225528e+06  1.240052e+06  1.254742e+06   
MK02 Southwest Finland     4.244067e+05  4.261853e+05  4.280048e+05   
MK04 Satakunta             2.409112e+05  2.408876e+05  2.407640e+05   
MK05 Kanta-Häme            1.617175e+05  1.628823e+05  1.639878e+05   
MK06 Pirkanmaa             4.296902e+05  4.315994e+05  4.332809e+05   
MK07 Päijät-Häme           2.046147e+05  2.053284e+05  2.058858e+05   
MK08 Kymenlaakso           1.862302e+05  1.861618e+05  1.864516e+05   
MK09 South Karelia         1.401872e+05  1.402107e+05  1.400637e+05   
MK10 South Savo            1.635423e+05  1.634713e+05  1.634326e+05   
MK11 N

In [80]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('Population_By_Region.csv')

# Melt the DataFrame to long format
df_long = df.melt(id_vars=['Area'], var_name='Month', value_name='Population')

# Extract the year from the 'Month' column
df_long['Year'] = df_long['Month'].str[:4]

# Extract the month number (remove the 'M' first)
df_long['Month_Num'] = df_long['Month'].str[4:].str.replace('M', '', regex=False).astype(int)

# Convert 'Population' column to numeric, handling potential errors
df_long['Population'] = pd.to_numeric(df_long['Population'], errors='coerce')

# Filter for December of each year
december_population = df_long[df_long['Month_Num'] == 12].copy()
december_population = december_population.drop('Month_Num', axis=1)

# Group by Area and Year, then take the first (and only) population value
december_population_by_year = december_population.groupby(['Area', 'Year'])['Population'].first().unstack()

# Display the result
print(december_population_by_year)

Year                          1990     1991     1992     1993     1994  \
Area                                                                     
MA0 WHOLE COUNTRY          4998478  5029002  5054982  5077912  5098754   
MA1 MAINLAND FINLAND       4973874  5004155  5029989  5052810  5073596   
MA2 Åland                    24604    24847    24993    25102    25158   
MK01 Uusimaa               1232236  1248072  1261807  1277609  1293573   
MK02 Southwest Finland      425282   427158   428864   430409   432603   
MK04 Satakunta              240777   240767   240744   240519   240028   
MK05 Kanta-Häme             162248   163442   164363   164767   164957   
MK06 Pirkanmaa              430651   432391   434066   435789   438114   
MK07 Päijät-Häme            204901   205613   206107   206233   206171   
MK08 Kymenlaakso            186030   186322   186382   186054   185462   
MK09 South Karelia          140244   140150   139907   139569   139297   
MK10 South Savo             163462   1

In [86]:
import pandas as pd

# Load the cleaned CSV file
df = pd.read_csv('Population_By_Region.csv')  # Make sure the file name is correct

# Melt the DataFrame to long format
df_long = df.melt(id_vars=['Area'], var_name='Month', value_name='Population')

# Extract the year and month number
df_long['Year'] = df_long['Month'].str[:4]
df_long['Month_Num'] = df_long['Month'].str[4:].str.replace('M', '', regex=False).astype(int)

# Convert 'Population' to numeric
df_long['Population'] = pd.to_numeric(df_long['Population'], errors='coerce')

# Check for any remaining rows with blank 'Area' after cleaning
if df_long['Area'].isnull().any() or (df_long['Area'] == '').any():
    print("WARNING: There are still rows with missing or empty 'Area' values. Please check your CSV file.")
    print(df_long[df_long['Area'].isnull() | (df_long['Area'] == '')])  # Show the rows with missing Area
else:
    # Filter for December
    december_population = df_long[df_long['Month_Num'] == 12].copy()
    december_population = december_population.drop('Month_Num', axis=1)

    # Group and unstack
    december_population_by_year = december_population.groupby(['Area', 'Year'])['Population'].first().unstack()

    # Display the result
    pd.set_option('display.max_rows', None)  # Show all rows
    print(december_population_by_year)
    pd.reset_option('display.max_rows')



december_population_by_year.to_csv('december_population_by_year.csv')


Year                          1990     1991     1992     1993     1994  \
Area                                                                     
MA0 WHOLE COUNTRY          4998478  5029002  5054982  5077912  5098754   
MA1 MAINLAND FINLAND       4973874  5004155  5029989  5052810  5073596   
MA2 Åland                    24604    24847    24993    25102    25158   
MK01 Uusimaa               1232236  1248072  1261807  1277609  1293573   
MK02 Southwest Finland      425282   427158   428864   430409   432603   
MK04 Satakunta              240777   240767   240744   240519   240028   
MK05 Kanta-Häme             162248   163442   164363   164767   164957   
MK06 Pirkanmaa              430651   432391   434066   435789   438114   
MK07 Päijät-Häme            204901   205613   206107   206233   206171   
MK08 Kymenlaakso            186030   186322   186382   186054   185462   
MK09 South Karelia          140244   140150   139907   139569   139297   
MK10 South Savo             163462   1