## Preparation

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
pd.set_option('display.expand_frame_repr', False)  #prevent line breaks in dataframe print

Load dataset

In [None]:
orig_df = pd.read_csv('../dataset/team_A_dataset.csv')

#all columns with NANs
print(orig_df.columns[orig_df.isna().any()].tolist())

#replacing NANs with zeros
df = orig_df.fillna(0)

## Aggregate over kraje

In [None]:
avg_cols = ['avg_monthly_salary', 'monthly_min_wage', 'monthly_inflation_rate_wrt_last_year', 'reer', 'bilance',
            'avg_energy_price', 'avg_gasoline_price', 'avg_natural_gas_price']

avg_df = orig_df.groupby(['month', 'year'], as_index=False)[avg_cols].mean()

sum_cols = ['general_thefts', 'break_in_thefts', 'uchazeciOZamestnaniUoZ', 'uchazeciOZamestnaniUoZZeny',
            'noveHlaseniUchazeci', 'noveHlasenaAUvolnenaVPM', 'obsazenaAZrusenaVPM', 'absolventiSkolAMladistvi',
            'z_do_65', 'm_do_65', 'celkem']

sum_df = orig_df.groupby(['month', 'year'], as_index=False)[sum_cols].sum()

df = pd.merge(avg_df, sum_df, on=['month', 'year'])

df['date'] = pd.to_datetime(df['year'].astype(str) + df['month'].astype(str), format='%Y%m')

df = df.set_index('date')
df = df.sort_index()

df = df.drop(columns=['month', 'year'])

df.tail(5)

## Time plots

In [None]:
plot_years = True  #plot vertical lines to show years

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['avg_monthly_salary'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Average wage")
plt.xlabel("Date")
plt.ylabel("Average monthly wage [Kč]");

An obvious seasonal pattern with an apparent upward trend. There appears to be no cyclical behavior.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['monthly_min_wage'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Minimum wage")
plt.xlabel("Date")
plt.ylabel("Minimum monthly wage [Kč]");

There is an obvious upward trend with a seasonal pattern in years since 2015. There appears to be no cyclical behavior.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['monthly_inflation_rate_wrt_last_year'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Inflation")
plt.xlabel("Date")
plt.ylabel("Inflation rate wrt last year [%]");

In regard to inflation there seems to be some cyclical pattern present with a period of about 4 years with no trend nor seasonal behavior. There is a big spike in inflation in years since 2021.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['reer'])
plt.title("Real effective exchange rate")
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.xlabel("Date")
plt.ylabel("REER");

There is no seasonal pattern in the development of REER, however, there seems to be an overall upward trend. There are no clear cycles in the observed time period. 

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['avg_energy_price'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Average electricity price")
plt.xlabel("Date")
plt.ylabel("Avg electricity price [USD / 1MWh]");

There is no evident trend in electricity prices, nor any cyclical or seasonal patterns. There is a big spike in prices in 2022 and 2023. 

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['avg_gasoline_price'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Average gasoline price")
plt.xlabel("Date")
plt.ylabel("Avg gasoline price [USD / 1l]");

There are no seasonal effects on the price of gasoline. There also appears to be no trend. There might be a cyclical pattern with a period of about 6 years, however, the observed time period is not long enough to be completely sure. 

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['avg_natural_gas_price'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Average natural gas price")
plt.xlabel("Date")
plt.ylabel("Avg gasoline price [USD / 1MMBtu]");

There is a cyclical pattern with a period of about 4 years. The development of natural gas prices displays no trend, nor any apparent seasonal patterns. There is a big spike in natrual gas price in years 2022, and 2023. 

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['general_thefts'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Common thefts")
plt.xlabel("Date")
plt.ylabel("Number of thefts");

The number of thefts displays a strong seasonal dependency with a downward trend. There are no perceivable cycles in the observed time period.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['break_in_thefts'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Break ins")
plt.xlabel("Date")
plt.ylabel("Number of break ins");

There appears to be some seasonal pattern in terms of number of break ins. There are no perceivable cycles in the observed time period. The overall development exhibits a downard trend.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['bilance'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("I don't understand this one")
plt.xlabel("Date")
plt.ylabel("Import/export balance");

The balance is close to constant with no obvious or marginal upward or downward trend; major spikes in certain years are attributed to impactful socioeconomic events.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['uchazeciOZamestnaniUoZ'], label='Overall')
plt.plot(df.index, df['uchazeciOZamestnaniUoZZeny'], label='Women')
plt.legend()
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Jobseekers")
plt.xlabel("Date")
plt.ylabel("Number of jobseekers");

Strong seasonal dependency with a downward trend can be observed; trend was interrupted in 2020, which we attribute to the start of the global pandemic, COVID-19. Another hike is present in the recent data - 2022 saw an increase. That we attribute to the heightened refugee immigration stemming from the Russian-Ukrainian war.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['noveHlaseniUchazeci'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Newly announced jobseekers")
plt.xlabel("Date")
plt.ylabel("Number of newly announced jobseekers");

Similar to the previous graph, there is both a seasonal pattern and a downward pattern. The data here increases our confidence in our hypothesized cause of the recent upward tendency - 2020 had many waves of newly announced jobseekers, which corresponds to the true events, where many business closed down throughout the year, letting go all of their employees. 2022 has two big waves instead at its second half, which is around the time the waves of refugees arrived.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['noveHlasenaAUvolnenaVPM'], label='Newly announced and freed open positions')
plt.plot(df.index, df['obsazenaAZrusenaVPM'], label='Already taken and closed open positions')
plt.legend()
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Open positions")
plt.xlabel("Date")
plt.ylabel("Number of positions");

As expected, these two predictors have inverse characteristics - there is an upward trend. Seasonal pattern is not as prominent here, however. Spikes in the data correspond with what was previously established. 

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['absolventiSkolAMladistvi'])
if plot_years:
    for x in df.index[::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Job-seeking students and youth")
plt.xlabel("Date")
plt.ylabel("Number of job-seeking students and youth");

Covid seems to have affected this group as well. The war, however, has not impacted the data as much. As the rest, there is a downward trend, with some hikes in most recent years due to economic instabilities. Seasonal pattern is present.

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df.index[158:], df['z_do_65'][158:], label='Women')
plt.plot(df.index[158:], df['m_do_65'][158:], label='Men')
plt.legend()
if plot_years:
    for x in df.index[158::12]:
        plt.axvline(x=x, color='black', linestyle='dashed', linewidth=1.2, alpha=0.9, label='axvline - full height')
plt.title("Ukranian refugees")
plt.xlabel("Date")
plt.ylabel("Number of students and youth");

The plot shows a major influx of refugees at the start of 2022, which supports our previous claims. Due to the reasons of migration, the vast majority of migrants are women.

# TODO FOR SLEEKY VIKI

'bilance', 'uchazeciOZamestnaniUoZ',
'noveHlaseniUchazeci', 'noveHlasenaAUvolnenaVPM', 'obsazenaAZrusenaVPM',
'absolventiSkolAMladistvi', 'z_do_65_w19', 'm_do_65_w19', 'celkem_w19'

## Seasonal plots

Seasonal plots for features which displayed signs of seasonal patterns.

In [None]:
def get_values(colum):
    ret = dict()
    for year in range(2009, 2024):
        dates = list(filter(lambda x: x.year == year, df.index))
        vals = df.loc[dates][colum].to_numpy()
        months = list(map(lambda x: x.month, dates))
        ret[year] = [months, vals]
    return ret

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('monthly_min_wage').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Minimum wage")
plt.xlabel("Month")
plt.ylabel("Minimum monthly wage [Kč]");
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('avg_monthly_salary').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Average wage")
plt.xlabel("Month")
plt.ylabel("Average monthly wage [Kč]");
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('general_thefts').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Common thefts")
plt.xlabel("Month")
plt.ylabel("Number of thefts");
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('break_in_thefts').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Break ins")
plt.xlabel("Month")
plt.ylabel("Number of break ins")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('bilance').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Balance")
plt.xlabel("Month")
plt.ylabel("Balance in millions")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('uchazeciOZamestnaniUoZ').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Jobseekers")
plt.xlabel("Month")
plt.ylabel("Number of jobseekers")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('noveHlaseniUchazeci').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Newly announced jobseekers")
plt.xlabel("Month")
plt.ylabel("Number of newly announced jobseekers")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('noveHlasenaAUvolnenaVPM').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Newly announced and freed open positions")
plt.xlabel("Month")
plt.ylabel("Number of newly announced and freed open positions")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('obsazenaAZrusenaVPM').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Already taken and closed open positions")
plt.xlabel("Month")
plt.ylabel("Number of already taken and closed open positions")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('absolventiSkolAMladistvi').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Job-seeking students and youth")
plt.xlabel("Month")
plt.ylabel("Number of job-seeking students and youth")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('z_do_65').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Ukrainian female refugees")
plt.xlabel("Month")
plt.ylabel("Number of ukrainian female refugees")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();

In [None]:
plt.figure(figsize=(16, 8))
for key, arrs in get_values('m_do_65').items():
    months = arrs[0]
    values = arrs[1]
    plt.plot(months, values, label=key)
plt.title("Ukrainian male refugees")
plt.xlabel("Month")
plt.ylabel("Number of ukrainian male refugees")
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
plt.show();