In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from src.DataLoaders import T01DataLoader, T08DataLoader
from src.DataFunctions import get_months, transform_monthly_data_to_list, transform_df_to_list, get_yearly_cases_by_key, create_x_labels
%config InlineBackend.figure_format = 'retina'

In [None]:
# Some Variables
years = range(2012,2023)
db_t01 = T01DataLoader()
db_t08 = T08DataLoader()

df_yearly = transform_df_to_list(db_t01, years)
df_years_monthly = transform_df_to_list(db_t08, years)

### T01 with crime scene distribution

In [None]:
R_BU_T01_F = 'Datasets/PKS/2022/BU-T01-Faelle.xlsx'
df = pd.read_excel(R_BU_T01_F,skiprows=3,thousands=',',decimal='.')
df = df.rename(columns={'erfasste Fälle davon:\nVersuche':'erfasste Fälle davon: Anzahl Versuche',
           'Unnamed: 5':'erfasste Fälle davon: Versuche in %',
           'Tatortverteilung':'Tatortverteilung: bis unter 20.000 Einwohner',
           'Unnamed: 7':'Tatortverteilung: 20.000 bis unter 100.000',
           'Unnamed: 8':'Tatortverteilung: 100.000 bis unter 500.000',
           'Unnamed: 9':'Tatortveteilung: 500.000 und mehr',
           'Unnamed: 10':'Tatortverteilung: unbekannt',
           'mit Schusswaffe':'mit Schusswaffe: gedroht',
           'Unnamed: 12':'mit Schusswaffe: geschossen',
           'Aufklärung':'Aufklärung: Anzahl Fälle',
           'Unnamed: 14':'Aufklärung: in % (AQ)',
           'Tatverdächtige':'Tatverdächtige: insgesamt',
           'Unnamed: 16':'Tatverdächtige: männlich',
           'Unnamed: 17':'Tatverdächtige: weiblich',
           'Nichtdeutsche Tatverdächtige':'Nichtdeutsche Tatverdächtige: Anzahl',
           'Unnamed: 19':'Nichtdeutsche Tatverdächtige: Anteil an TV insg. in %'})
df = df.drop(range(4)).reset_index(drop=True)
df.head()

### T01 with crime scene distribution in percent

In [None]:
R_BU_T01_T = 'Datasets/PKS/2022/Räumliche-Gliederung/Bund/BU-T01-Tatortverteilung-Prozent.xlsx'
df = pd.read_excel(R_BU_T01_T,skiprows=3,thousands=',',decimal='.')
df = df.rename(columns={'Tatortverteilung':'Tatortverteilung: bis unter 20.000 Einwohner',
                        'Unnamed: 4':'Tatortverteilung: 20.000 bis unter 100.000',
                        'Unnamed: 5':'Tatortverteilung: 100.000 bis unter 500.000',
                        'Unnamed: 6':'Tatortverteilung: 500.000 und mehr',
                        'Unnamed: 7':'Tatortverteilung: unbekannt'})
inhabitant_distribution = df.iloc[4,3:].rename('Einwohnerverteilung (01.01.2022) in %')
df = df.drop(range(5)).reset_index(drop=True)
df.head()

### T01 with crime scene distributions of crimes which were fully completed

In [None]:
R_BU_T01_F = 'Datasets/PKS/2022/Räumliche-Gliederung/Bund/BU-T01-vollendete-Faelle.xlsx'
df = pd.read_excel(R_BU_T01_F,skiprows=3,thousands=',',decimal='.')
df = df.rename(columns={'erfasste Fälle davon:\nVersuche':'erfasste Fälle davon: Anzahl Versuche',
           'Unnamed: 5':'erfasste Fälle davon: Versuche in %',
           'Tatortverteilung':'Tatortverteilung: bis unter 20.000 Einwohner',
           'Unnamed: 7':'Tatortverteilung: 20.000 bis unter 100.000',
           'Unnamed: 8':'Tatortverteilung: 100.000 bis unter 500.000',
           'Unnamed: 9':'Tatortveteilung: 500.000 und mehr',
           'Unnamed: 10':'Tatortverteilung: unbekannt',
           'mit Schusswaffe':'mit Schusswaffe: gedroht',
           'Unnamed: 12':'mit Schusswaffe: geschossen',
           'Aufklärung':'Aufklärung: Anzahl Fälle',
           'Unnamed: 14':'Aufklärung: in % (AQ)',
           'Tatverdächtige':'Tatverdächtige: insgesamt',
           'Unnamed: 16':'Tatverdächtige: männlich',
           'Unnamed: 17':'Tatverdächtige: weiblich',
           'Nichtdeutsche Tatverdächtige':'Nichtdeutsche Tatverdächtige: Anzahl',
           'Unnamed: 19':'Nichtdeutsche Tatverdächtige: Anteil an TV insg. in %'})
df = df.drop(range(4)).reset_index(drop=True)
df.head()

# Zeitliche Gliederung

In [None]:
BU_T08_T = 'Datasets/PKS/2022/BU-T08-Tatzeit.xlsx'
df = pd.read_excel(BU_T08_T,skiprows=4,thousands=',',decimal='.')
df.columns.values[3:15] = df.iloc[0,3:15]
df = df.drop([0,1]).reset_index(drop=True)
total = df[df.Straftat == 'Straftaten insgesamt']
total = pd.melt(total.iloc[:,3:-3],var_name='Monat',value_name='Straftaten insgesamt')
ax = total.plot(xlabel=total.Monat)
plt.title('Straftaten in Deutschland 2022')
ax.set_ylim(0,5e5)

total

## Yearly Data

In [None]:
# todo: crime vs cybercrime
cybercrime, crime, computer_betrug, betrug = np.ndarray(11), np.ndarray(11), np.ndarray(11), np.ndarray(11)
for i,year in enumerate(years):
    df = db_t01[year]
    crime[i] = df.loc[df['Schlüssel'] == '------', 'Anzahl erfasste Fälle'].values[0]
    cybercrime[i] = df.loc[df['Schlüssel'] == '543000', 'Anzahl erfasste Fälle'].values[0]

    computer_betrug[i] = df.loc[df['Schlüssel'] == '897000', 'Anzahl erfasste Fälle'].values[0]
    betrug[i] = df.loc[df['Schlüssel'] == '510000', 'Anzahl erfasste Fälle'].values[0]

fig,ax = plt.subplots()
ax.plot(years,cybercrime)
ax.plot(years,crime - cybercrime)
ax.set_title('"Analog" Crime vs. Cybercrime')
    
fig,ax = plt.subplots()
ax.plot(years,betrug,c='black')
ax.fill_between(years,betrug,betrug-computer_betrug,alpha=.5)
ax.fill_between(years,betrug-computer_betrug,0,alpha=.5)
ax.set(title='Analog vs. Digital Fraud',ylim=6e5)
ax.legend(['Betrug ges.','digital','analog'])
plt.xticks(rotation=0)
plt.show()

In [None]:
# Creating some plots to visualize the relation between vrimes in gerneal and cybercrimes
Z_T01_F = 'Datasets/PKS/2022/Zeitliche-Gliederung/T01-Faelle.csv'
df_z_t01_f = pd.read_csv(Z_T01_F,sep=';',encoding=' iso8859-1',skiprows=1,thousands=',',decimal='.')

def get_data_times_series(key):
    crime_data = df_z_t01_f[df_z_t01_f['Schluessel'] == key]
    crime_data_years = list(crime_data['Jahr'])
    crime_data_cases = list(crime_data['erfasste Faelle'])

    return crime_data, crime_data_years, crime_data_cases

all_crimes, all_crimes_x, all_crimes_y = get_data_times_series('------')
cyber_crimes, cyber_crimes_x, cyber_crimes_y = get_data_times_series('897000')

fig, axs = plt.subplots(1, 2, figsize=(12, 6),sharey=True,layout='constrained')

axs[0].plot(cyber_crimes_x, cyber_crimes_y)
axs[0].set_title('Cyber Crimes')

axs[1].plot(all_crimes_x, all_crimes_y)
axs[1].set_title('Overall Crimes')
plt.show()

fig, axs = plt.subplots(1, 2, figsize=(12, 6),sharey=True,layout='constrained')
axs[0].fill_between(all_crimes_x, all_crimes_y,np.asarray(all_crimes_y) - np.asarray(cyber_crimes_y), alpha=.3)
axs[0].fill_between(all_crimes_x, np.asarray(all_crimes_y) - np.asarray(cyber_crimes_y),0,alpha=.3)
axs[0].set(yscale='log',title='Digital vs. Analog Crime')
axs[0].legend(['digital','analog'])

axs[1].fill_between(all_crimes_x, np.asarray(all_crimes_y), np.asarray(cyber_crimes_y),alpha=.3)
axs[1].fill_between(all_crimes_x, np.asarray(cyber_crimes_y),0, alpha=.3)
axs[1].set(yscale='log',title='Digital vs. Analog Crime')
axs[1].legend(['analog','digital'])
fig.suptitle('Same data & scale, different order :D')


What we could say using this type of plot:
1. Overall crime has been almost constant in it's magnitude
2. Cybercrime, although very low in comparison, has grown almost exponentially (linear in log scale) for many years

In [None]:
# Return Pearson product-moment correlation coefficients
print(np.corrcoef(all_crimes_y, cyber_crimes_y))

In [None]:
computerbetrug, computerbetrug_x, computerbetrug_y = get_data_times_series('897100')
betrug, betrug_x, betrug_y = get_data_times_series('510000')

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

axs[0].plot(betrug_x, betrug_y)
axs[0].set_title('Betrug (inkl. Computerbetrug)')

axs[1].plot(computerbetrug_x, computerbetrug_y)
axs[1].set_title('Computerbetrug')
plt.show()
plt.close()

plt.title('Betrug und Computerbetrug')
plt.plot(betrug_x, betrug_y)
plt.plot(computerbetrug_x, computerbetrug_y)


### Relationship between Betrug and Computerbetrug

There seems to be a strong negative correlation between Betrug and Computerbetrug.  
Data for Computerbetrug is only available from 2016


In [None]:
n = len(computerbetrug_y)

print('Pearson product-moment correlation coefficients of Betrug and Computerbetrug')
print(np.corrcoef(betrug_y[-n:], computerbetrug_y))

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

axs[0].plot(computerbetrug_x, betrug_y[-n:])
axs[0].set_title('Betrug (inkl. Computerbetrug)')

axs[1].plot(computerbetrug_x, computerbetrug_y)
axs[1].set_title('Computerbetrug')
plt.show()
plt.close()


In [None]:
fig, ax = plt.subplots()
ax.plot(computerbetrug_x, computerbetrug_y)
ax.plot(computerbetrug_x, betrug_y[-n:])
ax.legend(['Computerbetrug','Betrug'])
ax.set_title('Betrug and Computerbetrug')
#ax.set_yscale('log')
fig.show()

# Exploration of monthly data

In the Table 08 of 2017 there is a change in the key 510000 (Betrug). Therefore the key is there twice in two rows  
&rarr; Which one do we have to use?/does one include the other?/ Can we just add them up?
    (In excel table row 516&517)

Same Problem in 2013

### Getting data for Betrug and Computerbetrung

#### Running into some issues with the keys used for computerfraud in T-08 from different years

517500 - Computerbetrug key until 2015  
897100 - Computerbetrug key after 2015

However, key 517500 is still used later for sonstiger Computerbetrug

In [None]:
# Betrug
betrug_cases = transform_monthly_data_to_list(df_years_monthly, '510000', True)

# Computerbetrug
# Key change in 2016 -> get_cases_year has to be called with different keys and the correspoinging data from the years
ind_2015 = 4
df_years_2012_to_2015 = df_years_monthly[:ind_2015]
df_years_2016_to_2022 = df_years_monthly[ind_2015:]

computerbetrug_cases = transform_monthly_data_to_list(df_years_2012_to_2015, '517500', True)
computerbetrug_cases_after_2015 = transform_monthly_data_to_list(df_years_2016_to_2022, '897100', True)

for year in computerbetrug_cases_after_2015:
    computerbetrug_cases.append(year)

### Visualization: Fraud and Computerfraud From 2016 to 2022

In [None]:
# create x labels
x_labels_monthly = create_x_labels(years)

# Flatten lists with cases of crimes

flat_betrug_cases = [element for year in betrug_cases for element in year]
flat_computerbetrug_cases = [element for year in computerbetrug_cases for element in year]

print(flat_betrug_cases)
print(flat_computerbetrug_cases)


# plot crimes 
fig, axs = plt.subplots(2, 1, figsize=(18, 18))
axs[0].plot(x_labels_monthly, flat_betrug_cases)
axs[0].set_title('Fraud')
axs[0].tick_params(axis='x', rotation=90)

axs[1].plot(x_labels_monthly, flat_computerbetrug_cases)
axs[1].set_title('Computerfraud')
axs[1].tick_params(axis='x', rotation=90)

axs[1].axvline(x='Jan. 2016', color='red')

for i in [0, 1]:
    # filtering x-labels
    for ind, label in enumerate(axs[i].xaxis.get_ticklabels()):
        if ind % 3 != 0:
            label.set_visible(False)

    # background shading
    for ind, month in enumerate(x_labels_monthly):
        if ind % 24 == 0:
            axs[i].axvspan(ind, ind+12, facecolor='grey', alpha=0.1)
            #axs.axvspan(ind+12, ind+12+12, facecolor='grey', alpha=0.2)



**Key 517500:**  
Jan. 2012 to Dec. 2015 &rarr; Computerbetrug  
Jan. 2016 to Dec. 2022 &rarr; Computerbetrug (sonstiger)

**Key 897100:**  
Jan. 2015 to Dec 2022 &rarr; Computerbetrug

**We can see that in the year 2015 two different keys (with different case numbers) were used for Computerbetrug.**

For now the old key is used till the end of 2015 for the Computerbetrug plots

In [None]:
fig, axs = plt.subplots(sharey=True, layout='constrained', figsize=(18,9))
axs.fill_between(x_labels_monthly, flat_betrug_cases)
axs.fill_between(x_labels_monthly,flat_computerbetrug_cases)

axs.set(yscale='linear', title='Fraud vs. Computerfraud')
axs.legend(['Fraud', 'Computerfraud'])
axs.tick_params(axis='x', rotation=90)
axs.axvline(x='Jan. 2016', color='red')


# filtering x-labels
for ind, label in enumerate(axs.xaxis.get_ticklabels()):
    if ind % 3 != 0:
        label.set_visible(False)

# background shading
for ind, month in enumerate(x_labels_monthly):
    if ind % 24 == 0:
        axs.axvspan(ind, ind+12, facecolor='grey', alpha=0.1)
        #axs.axvspan(ind+12, ind+12+12, facecolor='grey', alpha=0.2)
        




We saw a strong negative correlation between fraud and computerfraud between 2016 and 2022 using fraud and computerfraud, using yearly data.  
Using monthly data we can see a moderate to strong posive correaltion.

In [None]:
# Correlation between 2012 and 2022 (monthly data)
print('Correlation between fraud and cyberfraud between 2012 and 2022 (monthly data):', 
     '{:.5f}'.format(np.corrcoef(flat_betrug_cases, flat_computerbetrug_cases)[0,1]))

# Correlation for Covid Years: 2020 & 2021 (monthly data)
n = len(flat_betrug_cases)
two_years= n - 24
print('Correlation between fraud and cyberfraud between 2020 and 2021 (monthly data):', 
     '{:.5f}'.format(np.corrcoef(flat_betrug_cases[two_years:], flat_computerbetrug_cases[two_years:])[0,1]))

In [None]:
# Looking at Development of initial Computerbetrug Key 517500

data_517500 = transform_monthly_data_to_list(df_years_monthly, '517500', True) # TODO changing add_same_key changes data in 2015 -> look into it
data_517500_flat = [element for year in data_517500 for element in year]


fig, axs = plt.subplots(figsize=(18,9))
axs.plot(x_labels_monthly, data_517500_flat)
axs.plot(flat_computerbetrug_cases)
axs.set_title('Cyberfraud - differnet keys')
axs.tick_params(axis='x', rotation=90)
axs.legend(('Key 517500', 'Key 517500 till end of 2015, then key 897100'))

# filtering x-labels
for ind, label in enumerate(axs.xaxis.get_ticklabels()):
    if ind % 3 != 0:
        label.set_visible(False)

# background shading
for ind, month in enumerate(x_labels_monthly):
    if ind % 24 == 0:
        axs.axvspan(ind, ind+12, facecolor='grey', alpha=0.1)
        #axs.axvspan(ind+12, ind+12+12, facecolor='grey', alpha=0.2)

In [None]:
data_517500 = transform_monthly_data_to_list(df_years_monthly, '517500', True)
data_517500_flat = [element for year in data_517500 for element in year]


# Check if adding the same key up changes anything -> It doesn't
data_517500_add = transform_monthly_data_to_list(df_years_monthly, '517500', False)
data_517500_flat_add = [element for year in data_517500_add for element in year]

same_data = True
diff_count = 0
for e1, e2 in zip(data_517500_flat, data_517500_flat_add):
    if e1 != e2:
        same_data = False
        diff_count += 1

print('Same data: ' + str(same_data))

print(data_517500_flat)
print(data_517500_flat_add)
print(diff_count)

### Looking at general develpment of crimes based on montly data

Vertical lines used to mark the end of a year &rarr; highlights cyclic pattern of crime development

In [None]:
key = '------'

overall_crimes = transform_monthly_data_to_list(df_years_monthly, key, False)
flat_overall_crimes = [element for year in overall_crimes for element in year] 

fig, axs = plt.subplots(figsize=(18,9))
axs.plot(x_labels_monthly, flat_overall_crimes)
axs.tick_params(axis='x', rotation=90)
axs.set_title('Overall crimes in Germany')

# filtering x-labels
for ind, label in enumerate(axs.xaxis.get_ticklabels()):
    if ind % 3 != 0:
        label.set_visible(False)

# background shading
for ind, month in enumerate(x_labels_monthly):
    if ind % 24 == 0:
        axs.axvspan(ind, ind+12, facecolor='grey', alpha=0.1)
        #axs.axvspan(ind+12, ind+12+12, facecolor='grey', alpha=0.2)

In [None]:
mat = np.ndarray((11,12)) # init matrix
months = get_months()

for i,data in enumerate(flat_overall_crimes):
    mat[i//12,i%12] = data # fill matrix by division with remainder on index (12 months in a year)

# not the right way to do this, but it works as proof of concept
fig,ax = plt.subplots()
ax.matshow(mat,cmap='RdGy_r')
ax.set_yticks(range(0,11))
ax.set_xticks(range(0,12))
ax.set_xticklabels(months)
ax.set_yticklabels(range(2012,2023))
plt.xticks(rotation=45)
plt.show()

**It can be seen that the numbers from T01 and T08 do not add up**

Inside T08 the sum of the months + 'Tatzeit unbekannt' match 'Anzahl Fälle insgesamt'. However, in 2013 this is not the case as there is no column 'Tatzeit unbekannt'

In [None]:
# Calculating yearly totals by summing months from T08
# Getting cases with unknown month
time_unknown = []
for year, df in df_years_monthly:
    if 'Tatzeit unbekannt' in df:
        a = df['Tatzeit unbekannt'][df['Schlüssel'] == '------']
    else:
        a = 0
    time_unknown.append(int(a))


# Getting cases of the 12 months
yearly_totals_T08_mis_val = []  
for year in overall_crimes:
    y_sum = np.sum(year)
    yearly_totals_T08_mis_val.append(y_sum)

yearly_totals_T08 = [sum(a) for a  in zip(yearly_totals_T08_mis_val, time_unknown)]



# Yearls totals from T08 Column 'Anzahl Fälle insgesamt'
col_anz_faelle_ins_T08 = []
for year, df in df_years_monthly:
    total_cases = df['Anzahl erfasste Fälle'][df['Schlüssel'] == '------'].iloc[0]
    col_anz_faelle_ins_T08.append(int(total_cases))



# Yearly totals from T01
yearly_totals_T01 = [int(c) for c in crime]


# Visualization
print('Totals from T01:                ', yearly_totals_T01)
print('Anzahl Fälle insgesamt from T08:', col_anz_faelle_ins_T08)
print('Summed monthly cases from T08:  ', yearly_totals_T08)

# Line graph
x_labels = [i for i in range(2012, 2023)]

fig, axs = plt.subplots()
axs.plot(x_labels, yearly_totals_T01)
axs.plot(x_labels, col_anz_faelle_ins_T08)
axs.plot(x_labels, yearly_totals_T08)

axs.set(title='Overall Crimes per Year')
axs.legend(('T01', 'T08 "Anzahl Fälle insgesamt"', 'T08 self-calculated sum'))


# Histogram
x = range(len(yearly_totals_T01))
fig, axs = plt.subplots(figsize=(12,6))


axs.bar(x, yearly_totals_T01, width=0.2, label='T01')
axs.bar([i + 0.2 for i in x], col_anz_faelle_ins_T08, width=0.2, label='T08 "Anzahl Fälle insgesamt"')
axs.bar([i + 0.4 for i in x], yearly_totals_T08, width=0.2, label='T08 self-calculated sum')
axs.legend(loc='lower left')
axs.set(title='Overall crimes per year')
axs.set_xticks([i + 0.2 for i in x], x_labels)


In [None]:
tries = [450708, 457911, 461117, 481103, 533647, 454461, 423408, 416594, 399759, 371382, 414197]    #copied from T01-Faelle

T01_without_tries = [a - b for a, b in zip(yearly_totals_T01, tries)]#
print('T01 without tries:               ', T01_without_tries)
print('Anzahl Fälle insgesamt from T08: ',col_anz_faelle_ins_T08)


fig, axs = plt.subplots()
axs.plot(x_labels, T01_without_tries, label='T01 without "Erfasste Fälle davon Versuche"')
axs.plot(x_labels, col_anz_faelle_ins_T08, label='T08 "Anzahl Fälle insgesamt"')
axs.grid(axis='x')

axs.set(title='Overall Crimes per Year')
axs.legend()

# KFZ Diebstahl 263a (<- Cybercrime) vs KFZ Diebstahl 263 (Herrkömmlich)

In [None]:
def create_plots(c1_key, c2_key, sum_key_of_c1_and_c2, y_ticks, y_ticks_combined, args):
    df_years_monthly, df_yearly, c1_key_name, c2_key_name, sum_key_name = args

    # Get the cases from the keys
    c1_cases_monthly = transform_monthly_data_to_list(db_t08, c1_key, years, True)
    flat_c1_cases_monthly = [element for year in c1_cases_monthly for element in year]
    c2_cases_monthly = transform_monthly_data_to_list(db_t08, c2_key, years, True)
    flat_c2_cases_monthly = [element for year in c2_cases_monthly for element in year]
    if sum_key_of_c1_and_c2 is not None:
        sum_cases_monthly = transform_monthly_data_to_list(db_t08, sum_key_of_c1_and_c2, years, True)
        flat_sum_cases_monthly  = [element for year in sum_cases_monthly for element in year]
    x_labels_monthly = create_x_labels(years)

    c1_years, c1_cases_yearly = get_yearly_cases_by_key(c1_key, df_yearly, True, years)
    c2_years, c2_cases_yearly = get_yearly_cases_by_key(c2_key, df_yearly, True, years)
    if sum_key_of_c1_and_c2 is not None:
        sum_years, sum_cases_yearly = get_yearly_cases_by_key(sum_key_of_c1_and_c2, df_yearly, True, years)
    
    # Direct comparison with yearly data
    fig, axs = plt.subplots(1, 3, figsize=(15, 6))
    axs[0].plot(c1_years, c1_cases_yearly)
    axs[0].set_title(c1_key_name)
    axs[1].plot(c2_years, c2_cases_yearly, color='red')
    axs[1].set_title(c2_key_name)
    axs[2].plot(c1_years, c1_cases_yearly)
    axs[2].plot(c2_years, c2_cases_yearly, color='red')
    axs[2].set_title("Direct comparison")
    axs[2].legend([c1_key_name, c2_key_name])
    plt.show()
    plt.close()
   

    

    # plot monthly crimes over the years
    fig, axs = plt.subplots(2, 1, figsize=(15, 18))
    axs[0].plot(x_labels_monthly, flat_c1_cases_monthly)
    axs[0].set_title(f'{c1_key_name} from {years[0]} to {years[-1]}')
    axs[0].tick_params(axis='x', rotation=90)
    axs[0].grid(True)

    axs[1].plot(x_labels_monthly, flat_c2_cases_monthly)
    axs[1].set_title(f'{c2_key_name} from {years[0]} to {years[-1]}')
    axs[1].tick_params(axis='x', rotation=90)
    axs[1].grid(True)

    for year in years:
        tick = 'Dez. ' + str(year)
        axs[0].axvline(x=tick, color='red')
        axs[1].axvline(x=tick, color='red')

    n = 3  # Keeps every 7th label
    [l.set_visible(False) for (i,l) in enumerate(axs[0].xaxis.get_ticklabels()) if i % n != 0]
    [l.set_visible(False) for (i,l) in enumerate(axs[1].xaxis.get_ticklabels()) if i % n != 0]

    plt.show()

    
    # Compare them directly in one plot
    fig, axs = plt.subplots(sharey=True, layout='constrained', figsize=(15,9))
    axs.fill_between(x_labels_monthly, flat_c1_cases_monthly)
    axs.fill_between(x_labels_monthly, flat_c2_cases_monthly)
    axs.set(yscale='linear', title=f'{c1_key_name} vs. {c1_key_name}')
    axs.legend([c1_key_name, c2_key_name])
    axs.tick_params(axis='x', rotation=90)
    axs.grid(True)
    monthly_max = max(np.max(flat_c2_cases_monthly), np.max(flat_c1_cases_monthly))
    monthly_y_ticks = np.arange(0, np.ceil(monthly_max/1000)*1000, y_ticks)
    axs.set_yticks(monthly_y_ticks)
    [l.set_visible(False) for (i,l) in enumerate(axs.xaxis.get_ticklabels()) if i % n != 0] # Keep every n-th label
    
    for year in years:
        tick = 'Dez. ' + str(year)
        axs.axvline(x=tick, color='red')

    plt.show()

    # Plot the sum of them (with given sum key if given)
    # Find the maximum length of the two lists
    max_length = max(len(flat_c1_cases_monthly), len(flat_c2_cases_monthly))
    # Pad the shorter list with zeros to make them equal in length
    flat_c1_cases_padded = np.pad(flat_c1_cases_monthly, (0, max_length - len(flat_c1_cases_monthly)))
    flat_c2_cases_padded = np.pad(flat_c2_cases_monthly, (0, max_length - len(flat_c2_cases_monthly)))

    fig, axs = plt.subplots(sharey=True, layout='constrained', figsize=(15,9))
    axs.fill_between(x_labels_monthly, flat_c1_cases_padded, alpha=0.5)
    axs.fill_between(x_labels_monthly, flat_c2_cases_padded+flat_c1_cases_padded, flat_c1_cases_padded, alpha=0.5)
    if sum_key_of_c1_and_c2 is not None:
        axs.plot(x_labels_monthly, flat_sum_cases_monthly, c='black')
        axs.legend([sum_key_name, c1_key_name, c2_key_name])
        axs.set(yscale='linear', title=f'{sum_key_name} of {c1_key_name} + {c1_key_name}')
    else:
        axs.legend([c1_key_name, c2_key_name])
        axs.set(yscale='linear', title=f'{c1_key_name} + {c1_key_name}')
    
    axs.tick_params(axis='x', rotation=90)
    axs.grid(True)
    monthly_max_combined = np.max(flat_c2_cases_padded+flat_c1_cases_padded)
    monthly_y_ticks_combined = np.arange(0, np.ceil(monthly_max_combined/1000)*1000, y_ticks_combined)
    axs.set_yticks(monthly_y_ticks_combined)
    [l.set_visible(False) for (i,l) in enumerate(axs.xaxis.get_ticklabels()) if i % n != 0] # keep every n-th label

    for year in years:
        tick = 'Dez. ' + str(year)
        axs.axvline(x=tick, color='red')

    plt.show()

    # Compute Correlation coefficient for all cases in a year
    print(f"Pearson product-moment correlation coefficients of {c1_key_name} and {c2_key_name} with yearly data: \n{np.corrcoef(c1_cases_yearly, c2_cases_yearly)}") 
    print(f"Pearson product-moment correlation coefficients of {c1_key_name} and {c2_key_name} with monthly data: \n{np.corrcoef(flat_c1_cases_monthly, flat_c2_cases_monthly)}")

In [None]:
c1_key = '897000'
c2_key = '632079'
sum_key_of_c1_and_c2 = None
processed_data_for_keys = [db_t08, db_t01, c1_key, c2_key, sum_key_of_c1_and_c2]


In [None]:
create_plots(c1_key, c2_key, sum_key_of_c1_and_c2, y_ticks=2000, y_ticks_combined=5000, args=processed_data_for_keys)