In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tueplots.constants.color import rgb
from tueplots import figsizes, bundles, fontsizes, fonts
from src.DataLoaders import T01DataLoader, T08DataLoader
from src.DataFunctions import get_months, transform_monthly_data_to_list, transform_df_to_list, create_x_labels, get_yearly_cases_by_key
from src.PlotFuncitons import create_temporal_plots
%config InlineBackend.figure_format = 'retina'

# Variables

In [None]:
# Define variables containing our crime tables
years = range(2012,2023)
db_t01 = T01DataLoader()
db_t08 = T08DataLoader()

df_yearly = transform_df_to_list(db_t01, years) # crimes with yearly granularity
df_years_monthly = transform_df_to_list(db_t08, years) # crimes with monthly granularity

# Crime Keys
all_crimes_key = '------'
fraud_key = '510000'
computerfraud_key_until_2015 = '517500'
computerfraud_key_after_2015 = '897100'
cybercrime_key = '897000'

# Data Preparation

In [None]:
# Lists of crime cases
cybercrime, overall_crime, computerfraud, fraud = np.ndarray(11), np.ndarray(11), np.ndarray(11), np.ndarray(11)
for i,year in enumerate(years):
    df = db_t01[year]
    overall_crime[i] = df.loc[df['Schlüssel'] == all_crimes_key, 'Anzahl erfasste Fälle'].values[0]
    cybercrime[i] = df.loc[df['Schlüssel'] == cybercrime_key, 'Anzahl erfasste Fälle'].values[0]
    if year <= 2015:
        computerfraud[i] = df.loc[df['Schlüssel'] == computerfraud_key_until_2015, 'Anzahl erfasste Fälle'].values[0]
    else:
        computerfraud[i] = df.loc[df['Schlüssel'] == computerfraud_key_after_2015, 'Anzahl erfasste Fälle'].values[0]
    fraud[i] = df.loc[df['Schlüssel'] == fraud_key, 'Anzahl erfasste Fälle'].values[0]

overall_crimes = transform_monthly_data_to_list(df_years_monthly, all_crimes_key, False)
flat_overall_crimes = [element for year in overall_crimes for element in year] 

# Checking out the structure of some of the datasets

We used these blocks of code to get a rough feeling for our data.

### T01 with Crime Scene Distribution

In [None]:
R_BU_T01_F = 'Datasets/PKS/2022/BU-T01-Faelle.xlsx'
df = pd.read_excel(R_BU_T01_F,skiprows=3,thousands=',',decimal='.')
df = df.rename(columns={'erfasste Fälle davon:\nVersuche':'erfasste Fälle davon: Anzahl Versuche',
           'Unnamed: 5':'erfasste Fälle davon: Versuche in %',
           'Tatortverteilung':'Tatortverteilung: bis unter 20.000 Einwohner',
           'Unnamed: 7':'Tatortverteilung: 20.000 bis unter 100.000',
           'Unnamed: 8':'Tatortverteilung: 100.000 bis unter 500.000',
           'Unnamed: 9':'Tatortveteilung: 500.000 und mehr',
           'Unnamed: 10':'Tatortverteilung: unbekannt',
           'mit Schusswaffe':'mit Schusswaffe: gedroht',
           'Unnamed: 12':'mit Schusswaffe: geschossen',
           'Aufklärung':'Aufklärung: Anzahl Fälle',
           'Unnamed: 14':'Aufklärung: in % (AQ)',
           'Tatverdächtige':'Tatverdächtige: insgesamt',
           'Unnamed: 16':'Tatverdächtige: männlich',
           'Unnamed: 17':'Tatverdächtige: weiblich',
           'Nichtdeutsche Tatverdächtige':'Nichtdeutsche Tatverdächtige: Anzahl',
           'Unnamed: 19':'Nichtdeutsche Tatverdächtige: Anteil an TV insg. in %'})
df = df.drop(range(4)).reset_index(drop=True)
df.head()

### T01 with crime scene distribution in percent

In [None]:
R_BU_T01_T = 'Datasets/PKS/2022/BU-T01-Tatortverteilung-Prozent.xlsx'
df = pd.read_excel(R_BU_T01_T,skiprows=3,thousands=',',decimal='.')
df = df.rename(columns={'Tatortverteilung':'Tatortverteilung: bis unter 20.000 Einwohner',
                        'Unnamed: 4':'Tatortverteilung: 20.000 bis unter 100.000',
                        'Unnamed: 5':'Tatortverteilung: 100.000 bis unter 500.000',
                        'Unnamed: 6':'Tatortverteilung: 500.000 und mehr',
                        'Unnamed: 7':'Tatortverteilung: unbekannt'})
inhabitant_distribution = df.iloc[4,3:].rename('Einwohnerverteilung (01.01.2022) in %')
df = df.drop(range(5)).reset_index(drop=True)
df.head()

### T01 with crime scene distributions of crimes which were fully completed

In [None]:
R_BU_T01_F = 'Datasets/PKS/2022/BU-T01-vollendete-Faelle.xlsx'
df = pd.read_excel(R_BU_T01_F,skiprows=3,thousands=',',decimal='.')
df = df.rename(columns={'erfasste Fälle davon:\nVersuche':'erfasste Fälle davon: Anzahl Versuche',
           'Unnamed: 5':'erfasste Fälle davon: Versuche in %',
           'Tatortverteilung':'Tatortverteilung: bis unter 20.000 Einwohner',
           'Unnamed: 7':'Tatortverteilung: 20.000 bis unter 100.000',
           'Unnamed: 8':'Tatortverteilung: 100.000 bis unter 500.000',
           'Unnamed: 9':'Tatortveteilung: 500.000 und mehr',
           'Unnamed: 10':'Tatortverteilung: unbekannt',
           'mit Schusswaffe':'mit Schusswaffe: gedroht',
           'Unnamed: 12':'mit Schusswaffe: geschossen',
           'Aufklärung':'Aufklärung: Anzahl Fälle',
           'Unnamed: 14':'Aufklärung: in % (AQ)',
           'Tatverdächtige':'Tatverdächtige: insgesamt',
           'Unnamed: 16':'Tatverdächtige: männlich',
           'Unnamed: 17':'Tatverdächtige: weiblich',
           'Nichtdeutsche Tatverdächtige':'Nichtdeutsche Tatverdächtige: Anzahl',
           'Unnamed: 19':'Nichtdeutsche Tatverdächtige: Anteil an TV insg. in %'})
df = df.drop(range(4)).reset_index(drop=True)
df.head()

In [None]:
BU_T08_T = 'Datasets/PKS/2022/BU-T08-Tatzeit.xlsx'
df = pd.read_excel(BU_T08_T,skiprows=4,thousands=',',decimal='.')
df.columns.values[3:15] = df.iloc[0,3:15]
df = df.drop([0,1]).reset_index(drop=True)
total = df[df.Straftat == 'Straftaten insgesamt']
total = pd.melt(total.iloc[:,3:-3],var_name='Monat',value_name='Straftaten insgesamt')
# Code to plot all crimes in 2022
# ax = total.plot(xlabel=total.Monat)
# plt.title('Straftaten in Deutschland 2022')
# ax.set_ylim(0,5e5)

total

# Sanity Check: Searching for Inconsistencies Between Our Tables!

**It can be seen that the numbers from T01 and T08 do not add up**

Inside T08 the sum of the months + 'Tatzeit unbekannt' match 'Anzahl Fälle insgesamt'. However, in 2013 this is not the case as there is no column 'Tatzeit unbekannt'

In [None]:
# Yearly totals by summing months from T08

# Cases of the 12 months
yearly_totals_T08_mis_val = []  
for year in overall_crimes:
    y_sum = np.sum(year)
    yearly_totals_T08_mis_val.append(y_sum)


# Getting cases with unknown month
time_unknown = []
for year, df in df_years_monthly:
    if 'Tatzeit unbekannt' in df:
        a = df['Tatzeit unbekannt'][df['Schlüssel'] == all_crimes_key]
    else:
        a = 0
    time_unknown.append(int(a))

# Combining mothly cases and cases with unknown month
yearly_totals_T08 = [sum(a) for a  in zip(yearly_totals_T08_mis_val, time_unknown)]


# Yearly totals from T08 Column 'Anzahl Fälle insgesamt'
col_anz_faelle_ins_T08 = []
for year, df in df_years_monthly:
    total_cases = df['Anzahl erfasste Fälle'][df['Schlüssel'] == all_crimes_key].iloc[0]
    col_anz_faelle_ins_T08.append(int(total_cases))



# Yearly totals from T01
yearly_totals_T01 = [int(c) for c in overall_crime]


# Plotting raw numbers
print('Totals from T01:                ', yearly_totals_T01)
print('Anzahl Fälle insgesamt from T08:', col_anz_faelle_ins_T08)
print('Summed monthly cases from T08:  ', yearly_totals_T08)

x_labels = [i for i in range(2012, 2023)]

# Visualization - Line Graph
# This was removed as it didn't look as well as the Bar Plot
# plt.rcParams.update(bundles.icml2022(column="half", nrows=1, ncols=1))
# fig, axs = plt.subplots()
# axs.plot(x_labels, yearly_totals_T01, label='T01 yearly totals', color=rgb.tue_red)
# axs.plot(x_labels, col_anz_faelle_ins_T08,label='T08 "Anzahl Fälle insgesamt"', color=rgb.tue_blue)
# axs.plot(x_labels, yearly_totals_T08, label='T08 summed columns', color=rgb.tue_green)
# axs.grid(axis='y')
# axs.set(title='Overall Crimes per Year')
# axs.legend(loc='lower left')


# Visualization - Bar Plot
plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=2))

x = range(len(yearly_totals_T01))
fig, axs = plt.subplots()

axs.grid(axis='y', zorder=0)
axs.bar(x, yearly_totals_T01, width=0.2, label='T01 yearly totals', color=rgb.tue_red, zorder=3)
axs.bar([i + 0.2 for i in x], col_anz_faelle_ins_T08, width=0.2, label='T08 "Anzahl Fälle insgesamt"', color=rgb.tue_blue, zorder=3)
axs.bar([i + 0.4 for i in x], yearly_totals_T08, width=0.2, label='T08 summed columns', color=rgb.tue_green, zorder=3)
axs.legend(loc='lower left')
#axs.set_yscale('log')
axs.set(title='Overall Crimes per Year')
axs.set_xticks([i + 0.2 for i in x], x_labels)

plt.show()

## Checking if Adding Rows With the Same Key In T08 Changes Anything Compared to When It Is Not Done

There is a difference in the 12 months of 2015, which can be attributed to the overlap of keys for cyberfraud described above.

In [None]:
data_517500 = transform_monthly_data_to_list(df_years_monthly, '517500', True)
data_517500_flat = [element for year in data_517500 for element in year]

data_517500_add = transform_monthly_data_to_list(df_years_monthly, '517500', False)
data_517500_flat_add = [element for year in data_517500_add for element in year]

same_data = True
diff_count = 0
for e1, e2 in zip(data_517500_flat, data_517500_flat_add):
    if e1 != e2:
        same_data = False
        diff_count += 1

print('Data is always the same: ' + str(same_data))
print('Number of months with different data: ' + str(diff_count))

print('Not adding rows:', data_517500_flat)
print('Adding rows:    ', data_517500_flat_add)

## Assess Whether Excluding Attempted Crimes in T01 Data Aligns With T08 Data

Attempted crimes are labelled as "Erfasste Fälle davon Versuche" (Column 14 in the BU-T01-Faelle.xlsx files)

This is not the case, as it can be seen in the plot.

In [None]:
tries = [450708, 457911, 461117, 481103, 533647, 454461, 423408, 416594, 399759, 371382, 414197]    #copied from Table T01-Faelle

T01_without_tries = [a - b for a, b in zip(yearly_totals_T01, tries)]#
print('T01 without tries:       ', T01_without_tries)
print('Overall crimes from T08: ',col_anz_faelle_ins_T08)


fig, axs = plt.subplots()
axs.plot(x_labels, T01_without_tries, label='T01 without attempted crimes', color=rgb.tue_red)
axs.plot(x_labels, col_anz_faelle_ins_T08, label='T08 overall crimes', color=rgb.tue_blue)
axs.set_xticks(x_labels)
axs.grid(axis='y')

axs.set(title='Overall Crimes per Year')
axs.legend()

plt.show()

# Assess the correlation or cyclic pattern between months and years.

In [None]:
mat = np.ndarray((11,12)) # init matrix
months = get_months()

for i,data in enumerate(flat_overall_crimes):
    mat[i//12,i%12] = data # fill matrix by division with remainder on index (12 months in a year)

# not the right way to do this, but it works as proof of concept
fig,ax = plt.subplots()
ax.matshow(mat,cmap='RdGy_r')
ax.set_yticks(range(0,11))
ax.set_xticks(range(0,12))
ax.set_xticklabels(months)
ax.set_yticklabels(range(2012,2023))
plt.xticks(rotation=45)
plt.show()

# Temporal Exploration

## Exploration of Yearly Data (T01 Table)

In [None]:
fig,ax = plt.subplots()
ax.plot(years,cybercrime, color=rgb.tue_dark)
ax.plot(years,overall_crime - cybercrime)
ax.set_title('Overall Crime vs. Cybercrime')
ax.set_yscale('log')
  
plt.rcParams.update(bundles.icml2022(column="half", nrows=1, ncols=1))
fig,ax = plt.subplots()
ax.plot(years,fraud,c='black')
ax.fill_between(years,fraud,fraud-computerfraud,alpha=.5)
ax.fill_between(years,fraud-computerfraud,0,alpha=.5)
ax.set(title='Analog vs. Digital Fraud',ylim=6e5)
ax.legend(['Fraud total','digital','analog'])    
plt.xticks(rotation=0)
plt.show()

In [None]:
# Creating some plots to visualize the relation between vrimes in gerneal and cybercrimes
Z_T01_F = 'Datasets/PKS/2022/T01-Faelle.csv'
df_z_t01_f = pd.read_csv(Z_T01_F,sep=';',encoding=' iso8859-1',skiprows=1,thousands=',',decimal='.')

def get_data_times_series(key):
    crime_data = df_z_t01_f[df_z_t01_f['Schluessel'] == key]
    crime_data_years = list(crime_data['Jahr'])
    crime_data_cases = list(crime_data['erfasste Faelle'])

    return crime_data, crime_data_years, crime_data_cases

all_crimes, all_crimes_x, all_crimes_y = get_data_times_series(all_crimes_key)
cyber_crimes, cyber_crimes_x, cyber_crimes_y = get_data_times_series(cybercrime_key)

plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=1))
fig, axs = plt.subplots(1, 2,sharey=True,layout='constrained')

axs[0].plot(cyber_crimes_x, cyber_crimes_y)
axs[0].set_title('Cyber Crimes')
axs[0].grid(axis='y')


axs[1].plot(all_crimes_x, all_crimes_y)
axs[1].set_title('Overall Crimes')
axs[1].grid(axis='y')

plt.show()

fig, axs = plt.subplots(1, 2,sharey=True,layout='constrained')
axs[0].fill_between(all_crimes_x, all_crimes_y,np.asarray(all_crimes_y) - np.asarray(cyber_crimes_y), alpha=.3)
axs[0].fill_between(all_crimes_x, np.asarray(all_crimes_y) - np.asarray(cyber_crimes_y),0,alpha=.3)
axs[0].set(yscale='log',title='Digital vs. Analog Crime')
axs[0].legend(['digital','analog'])

axs[1].fill_between(all_crimes_x, np.asarray(all_crimes_y), np.asarray(cyber_crimes_y),alpha=.3)
axs[1].fill_between(all_crimes_x, np.asarray(cyber_crimes_y),0, alpha=.3)
axs[1].set(yscale='log',title='Digital vs. Analog Crime')
axs[1].legend(['analog','digital'])
#fig.suptitle('Same data & scale, different order :D')

What we could say using this type of plot:
1. Overall crime has been almost constant in it's magnitude
2. Cybercrime, although very low in comparison, has grown almost exponentially (linear in log scale) for many years

In [None]:
# Return Pearson product-moment correlation coefficients
print(np.corrcoef(all_crimes_y, cyber_crimes_y)[0][1])

### Fraud vs Computerfraud

In [None]:
computerbetrug, computerbetrug_x, computerbetrug_y = get_data_times_series('897100')
fraud, betrug_x, betrug_y = get_data_times_series('510000')

plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=2))
fig, axs = plt.subplots(1, 2)

axs[0].plot(betrug_x, betrug_y, color=rgb.tue_red)
axs[0].set_title('Fraud')
axs[0].grid(axis='y')

axs[1].plot(computerbetrug_x, computerbetrug_y, color=rgb.tue_red)
axs[1].set_title('Computerfraud (Key 897100)')
axs[1].grid(axis='y')

plt.show()
plt.close()

plt.rcParams.update(bundles.icml2022(column="half", nrows=1, ncols=1))
fig, axs = plt.subplots()
axs.set_title('Fraud vs. Computerfraud (Key 897100)')
axs.grid(axis='y')
axs.plot(betrug_x, betrug_y)
axs.plot(computerbetrug_x, computerbetrug_y)

plt.show()

### Relationship between Betrug and Computerbetrug

There seems to be a strong negative correlation between Betrug and Computerbetrug.  
Data for Computerbetrug is only available from 2016


In [None]:
n = len(computerbetrug_y)

print('Pearson product-moment correlation coefficients of Fraud and Cyberfraud with key 897100 (2016 - 2022):')
print('%.3f' % np.corrcoef(betrug_y[-n:], computerbetrug_y)[0][1])

plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=2))

fig, axs = plt.subplots(1, 2)

axs[0].plot(computerbetrug_x, betrug_y[-n:], color=rgb.tue_red)
axs[0].grid(axis='y')
axs[0].set_title('Fraud')

axs[1].plot(computerbetrug_x, computerbetrug_y, color=rgb.tue_red)
axs[1].grid(axis='y')
axs[1].set_title('Computerfraud (Key 897100)')

plt.show()

### Also add the % of solved crimes to the graph

In [None]:
betrug_2_x, betrug_2_y = get_yearly_cases_by_key(fraud_key, df_yearly[4:], True)
betrug_solved_x, betrug_solved_y = get_yearly_cases_by_key(fraud_key, df_yearly[4:], True, 'Aufklärung: Anzahl Fälle')
betrug_solved_y = [_solved / _crimes for _solved, _crimes in zip(betrug_solved_y, betrug_2_y)]
computerbetrug_2_x, computerbetrug_2_y = get_yearly_cases_by_key(computerfraud_key_after_2015, df_yearly[4:], True)
computerbetrug_solved_x, computerbetrug_solved_y = get_yearly_cases_by_key(computerfraud_key_after_2015, df_yearly[4:], True, 'Aufklärung: Anzahl Fälle')
computerbetrug_solved_y = [_solved / _crimes for _solved, _crimes in zip(computerbetrug_solved_y, computerbetrug_2_y)]

n = len(computerbetrug_2_y)
plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=2))
fig, axs = plt.subplots(1, 2)

betrug_line_cases, = axs[0].plot(computerbetrug_2_x, betrug_2_y[-n:], label = 'committed')
axs[0].set_title('Fraud')
axs[0].grid(axis='y')
betrug_axis_solved = axs[0].twinx()
betrug_line_solved, = betrug_axis_solved.plot(computerbetrug_2_x, betrug_solved_y[-n:], label = '\% solved', color=rgb.tue_gray)
lines = [betrug_line_cases, betrug_line_solved]
labels = [line.get_label() for line in lines]
betrug_axis_solved.legend(lines, labels, loc='upper right')
betrug_axis_solved.set_ylim((0.2, 0.8))

axs[1].set_title('Computerfraud (Key 897100)')
axs[1].grid(axis='y')
computerbetrug_line_cases, = axs[1].plot(computerbetrug_2_x, computerbetrug_2_y, label = 'committed')
computerbetrug_axis_solved = axs[1].twinx()
computerbetrug_line_solved, = computerbetrug_axis_solved.plot(computerbetrug_2_x, computerbetrug_solved_y[-n:], label = '\% solved', color=rgb.tue_gray)
lines = [computerbetrug_line_cases, computerbetrug_line_solved]
labels = [line.get_label() for line in lines]
computerbetrug_axis_solved.legend(lines, labels, loc='upper left')
computerbetrug_axis_solved.set_ylim(betrug_axis_solved.get_ylim())

plt.show()
plt.close()

print('Pearson product-moment correlation coefficients of Fraud and Cyberfraud with key 897100 (2016 - 2022):')
print('%.3f' % np.corrcoef(betrug_2_y[-n:], computerbetrug_2_y)[0][1])

In [None]:
# Get plot with the old key for Cyberfraud (517500)

_, _,data_517500_y = get_data_times_series('517500')

fig, axs = plt.subplots(1,2)
axs[0].plot(betrug_x, betrug_y, color=rgb.tue_red)
axs[0].set_title('Fraud')
axs[0].grid(axis='y')

axs[1].plot(betrug_x, data_517500_y, color=rgb.tue_red)
axs[1].set_title('Computerfraud (Key 517500)')
axs[1].grid(axis='y')

print('Pearson correlation Fraud and Computerfraud with key 51700 (1987 - 2022):')
print('%.3f' %np.corrcoef(betrug_y, data_517500_y)[0][1])

## Exploration of Monthly Data (T08 Table)

In the Table 08 of 2017 there is a change in the key 510000 (Betrug). Therefore the key is there twice in two rows  
&rarr; Which one do we have to use?/does one include the other?/ Can we just add them up?
    (In excel table row 516&517)

Same Problem in 2013

### Running into some issues with the keys used for computerfraud in T-08 from different years

517500 - Computerbetrug key until 2015  
897100 - Computerbetrug key after 2015

However, key 517500 is still used later for sonstiger Computerbetrug

In [None]:
# Betrug
betrug_cases = transform_monthly_data_to_list(df_years_monthly, '510000', True)

# Computerbetrug
# Key change in 2016 -> get_cases_year has to be called with different keys and the correspoinging data from the years
ind_2015 = 3 # 4
df_years_2012_to_2015 = df_years_monthly[:ind_2015]
df_years_2016_to_2022 = df_years_monthly[ind_2015:]

computerbetrug_cases = transform_monthly_data_to_list(df_years_2012_to_2015, '517500', True)
computerbetrug_cases_after_2015 = transform_monthly_data_to_list(df_years_2016_to_2022, '897100', True)

for year in computerbetrug_cases_after_2015:
    computerbetrug_cases.append(year)

### Visualization: Fraud and Computerfraud From 2016 to 2022

In [None]:
# create x labels
x_labels_monthly = create_x_labels(years)

# Flatten lists with cases of crimes

flat_betrug_cases = [element for year in betrug_cases for element in year]
flat_computerbetrug_cases = [element for year in computerbetrug_cases for element in year]

print(flat_betrug_cases)
print(flat_computerbetrug_cases)


# plot crimes 
plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=1))

fig, axs = plt.subplots(2, 1)
axs[0].plot(x_labels_monthly, flat_betrug_cases, color=rgb.tue_red)
axs[0].set_title('Fraud')
axs[0].tick_params(axis='x', rotation=90)
axs[0].grid(axis='y')


axs[1].plot(x_labels_monthly, flat_computerbetrug_cases, color=rgb.tue_red)
axs[1].set_title('Computerfraud')
axs[1].tick_params(axis='x', rotation=90)
axs[1].grid(axis='y')

axs[1].axvline(x='Jan. 2016', color=rgb.tue_blue)

for i in [0, 1]:
    # filtering x-labels
    for ind, label in enumerate(axs[i].xaxis.get_ticklabels()):
        if ind % 6 != 0:
            label.set_visible(False)

    # background shading
    for ind, month in enumerate(x_labels_monthly):
        if ind % 24 == 0:
            axs[i].axvspan(ind, ind+12, facecolor='grey', alpha=0.3)



**Key 517500:**  
Jan. 2012 to Dec. 2015 &rarr; Computerbetrug  
Jan. 2016 to Dec. 2022 &rarr; Computerbetrug (sonstiger)

**Key 897100:**  
Jan. 2015 to Dec 2022 &rarr; Computerbetrug

**We can see that in the year 2015 two different keys (with different case numbers) were used for Computerbetrug.**

For now the old key is used till the end of 2015 for the Computerbetrug plots

In [None]:
fig, axs = plt.subplots(sharey=True, layout='constrained')
plt.rcParams.update(bundles.icml2022(column="full"))


# # background shading
# for ind, month in enumerate(x_labels_monthly):
#     if ind % 24 == 0:
#         axs.axvspan(ind, ind+12, facecolor='grey', alpha=0.3)

axs.fill_between(x_labels_monthly, flat_betrug_cases, color=rgb.tue_red, alpha=0.5, label='Fraud', zorder=3)
axs.fill_between(x_labels_monthly,flat_computerbetrug_cases, color=rgb.tue_blue, alpha=0.5, label='Computerfraud', zorder=3)

axs.set(yscale='linear', title='Fraud vs. Computerfraud')
axs.legend(loc='upper right')
axs.tick_params(axis='x', rotation=90)
axs.grid(axis='y', zorder=0)



# filtering x-labels
for ind, label in enumerate(axs.xaxis.get_ticklabels()):
    if ind % 6 != 0:
        label.set_visible(False)

We saw a strong negative correlation between fraud and computerfraud between 2016 and 2022 using fraud and computerfraud, using yearly data.  
Using monthly data we can see a moderate to strong posive correaltion.

In [None]:
# Correlation between 2012 and 2022 (monthly data)
print('Correlation between fraud and cyberfraud between 2012 and 2022 (monthly data):', 
     '{:.3f}'.format(np.corrcoef(flat_betrug_cases, flat_computerbetrug_cases)[0,1]))

# Correlation for Covid Years: 2020 & 2021 (monthly data)
n = len(flat_betrug_cases)
two_years= n - 24
print('Correlation between fraud and cyberfraud between 2020 and 2021 (monthly data):', 
     '{:.3f}'.format(np.corrcoef(flat_betrug_cases[two_years:], flat_computerbetrug_cases[two_years:])[0,1]))

In [None]:
# Looking at Development of initial Computerbetrug Key 517500

data_517500 = transform_monthly_data_to_list(df_years_monthly, '517500', True)
data_517500_flat = [element for year in data_517500 for element in year]

data_897100 = [[None] * (ind_2015 *12)]
data_897100.append(flat_computerbetrug_cases[ind_2015 *12: ])
data_897100_flat = [element for year in data_897100 for element in year]

plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=1))

fig, axs = plt.subplots()
axs.set_yscale('linear')
axs.plot(x_labels_monthly, data_517500_flat, color=rgb.tue_blue)
axs.plot(data_897100_flat, color=rgb.tue_red)
axs.set_title('Cyberfraud - Differnet Keys')
axs.tick_params(axis='x', rotation=90)
axs.legend(('Key 517500', 'Key 897100'))
axs.grid(axis='y')

# filtering x-labels
for ind, label in enumerate(axs.xaxis.get_ticklabels()):
    if ind % 6 != 0:
        label.set_visible(False)

# background shading
for ind, month in enumerate(x_labels_monthly):
    if ind % 24 == 0:
        axs.axvspan(ind, ind+12, facecolor='grey', alpha=0.3)

### Looking at general develpment of crimes based on montly data

Vertical lines used to mark the end of a year &rarr; highlights cyclic pattern of crime development

In [None]:
plt.rcParams.update(bundles.icml2022(column="full", nrows=1, ncols=1))

# This visualizes the cyclic pattern better, but the format is not compatible with our report layout
#fig, axs = plt.subplots(figsize=(18,9))

fig, axs = plt.subplots()
axs.plot(x_labels_monthly, flat_overall_crimes, color=rgb.tue_red)
axs.tick_params(axis='x', rotation=90)
axs.set_title('Overall Crimes in Germany')
axs.grid(axis='y')

# filtering x-labels
for ind, label in enumerate(axs.xaxis.get_ticklabels()):
    if ind % 6 != 0:
        label.set_visible(False)

# background shading
for ind, month in enumerate(x_labels_monthly):
    if ind % 24 == 0:
        axs.axvspan(ind, ind+12, facecolor='grey', alpha=0.3)


# KFZ Diebstahl 263a/ Vehicle robbery (<- Cybercrime) vs KFZ Diebstahl 263/ Vehicle robbery (<- Non-Cybercrime)

In [None]:
c1_key = '511120'
c1_name = "Vehicle robbery (cybercrime variant)"
c2_key = '511110'
c2_name = "Vehicle robbery"

c1_years, c1_cases_yearly = get_yearly_cases_by_key(c1_key, df_yearly, True)
c1_cases_monthly = [element for year in transform_monthly_data_to_list(df_years_monthly, c1_key, True) for element in year]
c2_years, c2_cases_yearly = get_yearly_cases_by_key(c2_key, df_yearly, True)
c2_cases_monthly = [element for year in transform_monthly_data_to_list(df_years_monthly, c2_key, True) for element in year]
create_temporal_plots(c1_cases_yearly = c1_cases_yearly, c1_years = c1_years, 
                      c2_cases_yearly = c2_cases_yearly, c2_years = c2_years, 
                      flat_c1_cases_monthly = c1_cases_monthly, flat_c2_cases_monthly = c2_cases_monthly, 
                      x_labels_monthly = create_x_labels(years), 
                      c1_key_name = c1_name, c2_key_name = c2_name,  
                      years=years)

There is a strong negative correlation between cyber vehicle robbery and vehicle robbery when taking the data with yearly granularity.