In [1]:
import pandas as pd
import numpy as np

In [2]:
years = ["2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021",
         "2022", "2023"]

months_czech = ["leden", "unor", "brezen", "duben", "kveten", "cerven", "cervenec", "srpen", "zari", "rijen",
                "listopad", "prosinec"]

months_nums = [
    "01",  # January
    "02",  # February
    "03",  # March
    "04",  # April
    "05",  # May
    "06",  # June
    "07",  # July
    "08",  # August
    "09",  # September
    "10",  # October
    "11",  # November
    "12"  # December
]

month2days = {
    "01": 31,  # January
    "02": 28,  # February (29 in leap years)
    "03": 31,  # March
    "04": 30,  # April
    "05": 31,  # May
    "06": 30,  # June
    "07": 31,  # July
    "08": 31,  # August
    "09": 30,  # September
    "10": 31,  # October
    "11": 30,  # November
    "12": 31,  # December
}

NUTS3_name2abbrev = {
    "Praha": "PHA",
    "Středočeský": "STC",
    "Jihočeský": "JHC",
    "Plzeňský": "PLK",
    "Karlovarský": "KVK",
    "Ústecký": "ULK",
    "Liberecký": "LBK",
    "Královéhradecký": "HKK",
    "Pardubický": "PAK",
    "Vysočina": "VYS",
    "Jihomoravský": "JHM",
    "Olomoucký": "OLK",
    "Moravskoslezský": "MSK",
    "Zlínský": "ZLK",
}

kraje = ["PHA",
         "STC",
         "JHC",
         "PLK",
         "KVK",
         "ULK",
         "LBK",
         "HKK",
         "PAK",
         "VYS",
         "JHM",
         "OLK",
         "MSK",
         "ZLK", ]

# Salaries
region_codes_map = {
    '3018': 'PHA',
    '3026': 'STC',
    '3034': 'JHC',
    '3042': 'PLK',
    '3051': 'KVK',
    '3115': 'JHM',
    '3107': 'VYS',
    '3085': 'HKK',
    '3077': 'LBK',
    '3140': 'MSK',
    '3123': 'OLK',
    '3093': 'PAK',
    '3069': 'ULK',
    '3131': 'ZLK'
}
region_code = ['3018', '3034', '3115', '3051', '3107', '3085', '3077', '3140', '3123', '3093', '3042', '3026', '3069',
               '3131']

quarters = {
    "Q1": [1, 2, 3],
    "Q2": [4, 5, 6],
    "Q3": [7, 8, 9],
    "Q4": [10, 11, 12],
}

## Criminality Dataset

In [214]:
years_1 = ["2009", "2010", "2011", "2012", "2013", "2014"]
# idx_1 = [[97, 80], [97,80], [97,80]]
idx_1 = {
    "2009" : [92,75],
    "2010" : [97, 80],
    "2011" : [97, 80],
    "2012" : [97, 80],
    "2013" : [97, 80],
    "2014" : [97,80]
}
years_2 = ["2016", "2017", "2018", "2019", "2020"]
idx_2 = ["Krádeže prosté","Krádeže vloupáním" ]  # correct row numbers
years_3 = ["2021", "2022", "2023"]
idx_3 = ['krádeže prosté', 'krádeže vloupáním']

In [227]:
def criminality_rate_new(months_czech, years, idx):
    crim_df = pd.DataFrame(columns=('month', 'year', 'kraj', 'general_thefts', 'break_in_thefts'))
    count = 0
    search_value_1 = idx[0]
    search_value_2 = idx[1]
    column_to_search = 'Unnamed: 1'
    for year in years:
        # remember the last month for every kraj - cummulative numbers
        last_month_gen_tft = [0] * len(kraje)
        last_month_brk_in = [0] * len(kraje)
        for m in range(0, len(months_czech)):
            path = f"https://www.policie.cz/soubor/{year}-{months_nums[m]}-{months_czech[m]}-sest-01a-xlsx.aspx"
            try:
                df = pd.read_excel(path, sheet_name=None)  #loop through all sheets
                curr_kraj = 0
                if (int(year) >= 2019 and int(year) <= 2020) or (int(year) == 2018 and int(months_nums[m]) >= 3):
                    search_value_1 = "krádeže prosté"
                    search_value_2 = "krádeže vloupáním"
                for x in df:  # go through the sheets
                    if x in NUTS3_name2abbrev:
                        # print(df[x].columns)
                        gen_tft = df[x][df[x][column_to_search] == search_value_1]
                        brk_in = df[x][df[x][column_to_search] == search_value_2]
                        dict = {
                            'month': int(months_nums[m]),
                            'year': int(year),
                            'kraj': NUTS3_name2abbrev.get(x),
                            'general_thefts': gen_tft["Unnamed: 2"].values[0] - last_month_gen_tft[curr_kraj],
                            'break_in_thefts': brk_in["Unnamed: 2"].values[0] - last_month_brk_in[curr_kraj]
                        }
                        # print("Kraj: ", NUTS3_name2abbrev.get(x), " momnth: ", months_nums[m])
                        # print("Doc: ", gen_tft["Unnamed: 2"] , " last month: ", last_month_gen_tft[curr_kraj])
                        crim_df.loc[count] = dict
                        last_month_gen_tft[curr_kraj] = gen_tft["Unnamed: 2"].values[0]
                        last_month_brk_in[curr_kraj] = brk_in["Unnamed: 2"].values[0]
                        # print("Saving: ", gen_tft["Unnamed: 2"] )
                        count += 1
                        curr_kraj += 1
                # count += len(df) # number of sheets
            except Exception as e:
                print("error:", e, " ", " ", months_czech[m], " ", year)
    return crim_df

In [228]:
crim_df_3 = criminality_rate_new(months_czech, years_3, idx_3)
crim_df_3

error: HTTP Error 404: Not Found     listopad   2023
error: HTTP Error 404: Not Found     prosinec   2023


Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2021,PHA,374,826
1,1,2021,STC,197,301
2,1,2021,JHC,55,59
3,1,2021,PLK,91,134
4,1,2021,ULK,180,220
...,...,...,...,...,...
471,10,2023,ZLK,93,35
472,10,2023,VYS,77,49
473,10,2023,PAK,72,38
474,10,2023,LBK,84,89


In [229]:
crim_df_2 = criminality_rate_new(months_czech, years_2, idx_2)
crim_df_2

error: <urlopen error [Errno 110] Connection timed out>     listopad   2019


Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2016,PHA,2395,671
1,1,2016,STC,483,319
2,1,2016,JHC,194,132
3,1,2016,PLK,236,107
4,1,2016,ULK,499,219
...,...,...,...,...,...
821,12,2020,ZLK,34,29
822,12,2020,VYS,40,33
823,12,2020,PAK,44,31
824,12,2020,LBK,65,44


In [104]:
def criminality_rate_oldest(months_czech, years, idx):
    crim_df = pd.DataFrame(columns=('month', 'year', 'kraj', 'general_thefts', 'break_in_thefts'))
    count = 0
    for year in years:
         # remember the last month for every kraj - cummulative numbers
        last_month_gen_tft = [0] * len(kraje)
        last_month_brk_in = [0] * len(kraje)
        for m in range(0, len(months_czech)):
            days = month2days.get(months_nums[m])
            if int(year) == 2012 and int(months_nums[m]) == 2:
                days = "29"
            paths = [f"https://www.policie.cz/soubor/{months_nums[m]}-celkova-kriminalita-za-obdobi-od-01-01-{year}-do-{days}-{months_nums[m]}-{year}.aspx", f"https://www.policie.cz/soubor/statistiky-nove-cleneni-{year}-{months_nums[m]}-celkova-kriminalita-za-obdobi-od-01-01-{year}-do-{days}-{months_nums[m]}-{year}", f"https://www.policie.cz/soubor/2013-2014-{months_nums[m]}-celkova-kriminalita-za-obdobi-od-01-01-{year}-do-{days}-{months_nums[m]}-{year}.aspx"]
            for k in range(len(paths)):
                try:
                    df = pd.read_excel(paths[k], sheet_name=None)  #loop through all sheets
                    first_sheet = True
                    curr_kraj = 0
                    for x in df:  # go through the sheets
                        # skip the CR sheet
                        if first_sheet:
                            first_sheet = False
                            continue
                        dict = {
                            'month': int(months_nums[m]),
                            'year': year,
                            'kraj': kraje[curr_kraj],
                            'general_thefts': (df[x]["Unnamed: 3"]).iloc[idx.get(year)[0]] - last_month_gen_tft[curr_kraj],
                            'break_in_thefts': (df[x]["Unnamed: 3"]).iloc[idx.get(year)[1]] - last_month_brk_in[curr_kraj]
                        }
                        # print("Kraj: ", kraje[curr_kraj])
                        # print("doc: ",(df[x]["Unnamed: 3"]).iloc[idx[k][0]], " last month: ",  last_month_brk_in[curr_kraj])
                        crim_df.loc[count] = dict
                        last_month_gen_tft[curr_kraj] = (df[x]["Unnamed: 3"]).iloc[idx.get(year)[0]]
                        last_month_brk_in[curr_kraj] = (df[x]["Unnamed: 3"]).iloc[idx.get(year)[1]]
                        count += 1
                        curr_kraj += 1
                except Exception as e:
                    print("error:", e, " ", " ", months_czech[m], " ", year)
    return crim_df

In [105]:
crim_df_1 = criminality_rate_oldest(months_czech, years_1, idx_1)
crim_df_1

error: HTTP Error 404: Not Found     leden   2009
error: HTTP Error 404: Not Found     leden   2009
error: HTTP Error 404: Not Found     unor   2009
error: HTTP Error 404: Not Found     unor   2009
error: HTTP Error 404: Not Found     brezen   2009
error: HTTP Error 404: Not Found     brezen   2009
error: HTTP Error 404: Not Found     duben   2009
error: HTTP Error 404: Not Found     duben   2009
error: HTTP Error 404: Not Found     kveten   2009
error: HTTP Error 404: Not Found     kveten   2009
error: HTTP Error 404: Not Found     cerven   2009
error: HTTP Error 404: Not Found     cerven   2009
error: HTTP Error 404: Not Found     cervenec   2009
error: HTTP Error 404: Not Found     cervenec   2009
error: HTTP Error 404: Not Found     srpen   2009
error: HTTP Error 404: Not Found     srpen   2009
error: HTTP Error 404: Not Found     zari   2009
error: HTTP Error 404: Not Found     zari   2009
error: HTTP Error 404: Not Found     rijen   2009
error: HTTP Error 404: Not Found     rijen

Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2009,PHA,4237,775
1,1,2009,STC,1140,716
2,1,2009,JHC,275,181
3,1,2009,PLK,469,231
4,1,2009,KVK,198,96
...,...,...,...,...,...
975,12,2014,VYS,126,124
976,12,2014,JHM,113,112
977,12,2014,OLK,162,125
978,12,2014,MSK,265,150


### Add timeouts

In [230]:
yr1 = ["2019"]
crim_timeout_1 = criminality_rate_new(months_czech, yr1, idx_2)
crim_timeout_1

Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2019,PHA,1732,369
1,1,2019,STC,382,220
2,1,2019,JHC,151,73
3,1,2019,PLK,237,138
4,1,2019,ULK,379,149
...,...,...,...,...,...
163,12,2019,ZLK,69,48
164,12,2019,VYS,49,44
165,12,2019,PAK,62,33
166,12,2019,LBK,100,70


In [152]:
yr2 = ["2010", "2014"]
crim_timeout_2 = criminality_rate_oldest(months_czech, yr2, idx_1)
crim_timeout_2

error: HTTP Error 404: Not Found     leden   2010
error: HTTP Error 404: Not Found     leden   2010
error: HTTP Error 404: Not Found     unor   2010
error: HTTP Error 404: Not Found     unor   2010
error: HTTP Error 404: Not Found     brezen   2010
error: HTTP Error 404: Not Found     brezen   2010
error: HTTP Error 404: Not Found     duben   2010
error: HTTP Error 404: Not Found     duben   2010
error: HTTP Error 404: Not Found     kveten   2010
error: HTTP Error 404: Not Found     kveten   2010
error: HTTP Error 404: Not Found     cerven   2010
error: HTTP Error 404: Not Found     cerven   2010
error: HTTP Error 404: Not Found     cervenec   2010
error: HTTP Error 404: Not Found     cervenec   2010
error: HTTP Error 404: Not Found     srpen   2010
error: HTTP Error 404: Not Found     srpen   2010
error: HTTP Error 404: Not Found     zari   2010
error: HTTP Error 404: Not Found     zari   2010
error: HTTP Error 404: Not Found     rijen   2010
error: HTTP Error 404: Not Found     rijen

Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2010,PHA,3134,808
1,1,2010,STC,916,644
2,1,2010,JHC,298,184
3,1,2010,PLK,311,260
4,1,2010,KVK,709,449
...,...,...,...,...,...
331,12,2014,VYS,126,124
332,12,2014,JHM,113,112
333,12,2014,OLK,162,125
334,12,2014,MSK,265,150


In [121]:
yr3 = ["2013"]
crim_timeout_3 = criminality_rate_oldest(months_czech, yr3, idx_1)
crim_timeout_3

error: HTTP Error 404: Not Found     leden   2013
error: HTTP Error 404: Not Found     leden   2013
error: HTTP Error 404: Not Found     unor   2013
error: HTTP Error 404: Not Found     unor   2013
error: HTTP Error 404: Not Found     brezen   2013
error: HTTP Error 404: Not Found     brezen   2013
error: HTTP Error 404: Not Found     duben   2013
error: HTTP Error 404: Not Found     duben   2013
error: HTTP Error 404: Not Found     kveten   2013
error: HTTP Error 404: Not Found     kveten   2013
error: HTTP Error 404: Not Found     cerven   2013
error: HTTP Error 404: Not Found     cerven   2013
error: HTTP Error 404: Not Found     cervenec   2013
error: HTTP Error 404: Not Found     cervenec   2013
error: HTTP Error 404: Not Found     srpen   2013
error: HTTP Error 404: Not Found     srpen   2013
error: HTTP Error 404: Not Found     zari   2013
error: HTTP Error 404: Not Found     zari   2013
error: HTTP Error 404: Not Found     rijen   2013
error: HTTP Error 404: Not Found     rijen

Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2013,PHA,3713,824
1,1,2013,STC,953,784
2,1,2013,JHC,302,261
3,1,2013,PLK,331,279
4,1,2013,KVK,812,594
...,...,...,...,...,...
163,12,2013,VYS,160,133
164,12,2013,JHM,181,90
165,12,2013,OLK,223,132
166,12,2013,MSK,330,226


## Drop duplicates

In [131]:
crim_df_1

Unnamed: 0_level_0,month,kraj,general_thefts,break_in_thefts
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009,1,PHA,4237,775
2009,1,STC,1140,716
2009,1,JHC,275,181
2009,1,PLK,469,231
2009,1,KVK,198,96
...,...,...,...,...
2014,12,VYS,126,124
2014,12,JHM,113,112
2014,12,OLK,162,125
2014,12,MSK,265,150


In [139]:
# crim_df_1.set_index(keys=["year"], inplace=True)
# crim_df_1 = crim_df_1.drop(index=["2010"], axis=0)
# crim_df_1 = crim_df_1.drop(index=["2013"], axis=0)
# crim_df_1 = crim_df_1.drop(index=["2014"], axis=0)
# crim_df_1.drop(index=["2019"], axis=0)
# crim_df_1.reset_index(inplace=True)
crim_df_1

Unnamed: 0,year,month,kraj,general_thefts,break_in_thefts
0,2009,1,PHA,4237,775
1,2009,1,STC,1140,716
2,2009,1,JHC,275,181
3,2009,1,PLK,469,231
4,2009,1,KVK,198,96
...,...,...,...,...,...
499,2012,12,VYS,164,148
500,2012,12,JHM,127,140
501,2012,12,OLK,180,128
502,2012,12,MSK,330,156


In [239]:
# crim_df_2.set_index(keys=["year"], inplace=True)
# crim_df_2 = crim_df_2.drop(index=[2019], axis=0)
# crim_df_2.reset_index(inplace=True)
crim_df_2

Unnamed: 0,year,month,kraj,general_thefts,break_in_thefts
0,2016,1,PHA,2395,671
1,2016,1,STC,483,319
2,2016,1,JHC,194,132
3,2016,1,PLK,236,107
4,2016,1,ULK,499,219
...,...,...,...,...,...
667,2020,12,ZLK,34,29
668,2020,12,VYS,40,33
669,2020,12,PAK,44,31
670,2020,12,LBK,65,44


### Goddamn 2015
### ZIPs available [here](https://www.policie.cz/clanek/statisticke-prehledy-kriminality-za-rok-2015.aspx)

In [32]:
def criminality_rate_2015(idx):
    crim_df = pd.DataFrame(columns=('month', 'year', 'kraj', 'general_thefts', 'break_in_thefts'))
    count = 0
    for m in range(0, len(months_czech)):
        path = f"./Statistiky_od_01.01.2015_do_{month2days.get(months_nums[m])}.{months_nums[m]}.2015/Statistiky přehled-celkem.xls"
        # remember the last month for every kraj - cummulative numbers
        last_month_gen_tft = [0] * len(kraje)
        last_month_brk_in = [0] * len(kraje)
        try:
            df = pd.read_excel(path, sheet_name=None)  #loop through all sheets
            first_sheet = True
            curr_kraj = 0
            for x in df:  # go through the sheets
                # skip the CR sheet
                if first_sheet:
                    first_sheet = False
                    continue
                dict = {
                    'month': int(months_nums[m]),
                    'year': 2015,
                    'kraj': kraje[curr_kraj],
                    'general_thefts': (df[x]["Unnamed: 3"]).iloc[idx[0]] - last_month_gen_tft[curr_kraj],
                    'break_in_thefts': (df[x]["Unnamed: 3"]).iloc[idx[1]] - last_month_brk_in[curr_kraj]
                }
                crim_df.loc[count] = dict
                last_month_gen_tft[curr_kraj] = (df[x]["Unnamed: 3"]).iloc[idx[0]]
                last_month_brk_in[curr_kraj] = (df[x]["Unnamed: 3"]).iloc[idx[1]]
                count += 1
                curr_kraj += 1
        except Exception as e:
            print("error:", e, " ", " ", months_czech[m], " ", 2015)
    return crim_df

In [33]:
idx_2015 = [97, 80]
crim_df_2015 = criminality_rate_2015(idx_2015)
crim_df_2015

Unnamed: 0,month,year,kraj,general_thefts,break_in_thefts
0,1,2015,PHA,2496,662
1,1,2015,STC,664,480
2,1,2015,JHC,270,152
3,1,2015,PLK,276,185
4,1,2015,KVK,590,312
...,...,...,...,...,...
163,12,2015,VYS,1822,1075
164,12,2015,JHM,1435,925
165,12,2015,OLK,1839,956
166,12,2015,MSK,3407,1772


In [240]:
# merge the criminality
crim_df = None
crims = [crim_df_3, crim_timeout_1,crim_df_2, crim_df_2015, crim_timeout_3, crim_timeout_2, crim_df_1]
for df_el in crims:
        if crim_df is None:
            crim_df = df_el
        else:
            crim_df = pd.concat([crim_df, df_el], axis=0)
crim_df["year"] = crim_df.year.astype(int)
crim_df_sorted = crim_df.sort_values(by=['year', 'kraj', 'month'])
crim_df_sorted.set_index(['month','year', 'kraj'], inplace=True)
crim_df_sorted.to_csv("./criminality.csv")
crim_df_sorted

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,general_thefts,break_in_thefts
month,year,kraj,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2009,HKK,271,174
2,2009,HKK,245,164
3,2009,HKK,284,207
4,2009,HKK,271,162
5,2009,HKK,268,191
...,...,...,...,...
6,2023,ZLK,68,51
7,2023,ZLK,95,48
8,2023,ZLK,106,30
9,2023,ZLK,67,57


## Salaries

In [72]:
def get_salaries(region_code):
    sal_df = pd.DataFrame(columns=('month', 'year', 'kraj', 'avg_monthly_salary'))
    count = 0
    for i in range(0, len(region_code)):
        paths = [f"https://vdb.czso.cz/vdbvo2/faces/cs/index.jsf?page=vystup-objekt&z=T&f=TABULKA&skupId=855&filtr=G%7EF_M%7EF_Z%7EF_R%7ET_P%7E_S%7E_null_null_&katalog=30852&pvo=MZD01-E&pvo=MZD01-E&evo=v208_!_MZD01-Q-do2010_1&u=v479__VUZEMI__100__{region_code[i]}#w=", f"https://vdb.czso.cz/vdbvo2/faces/cs/index.jsf?page=vystup-objekt&pvo=MZD01-C&z=T&f=TABULKA&skupId=855&filtr=G%7EF_M%7EF_Z%7EF_R%7ET_P%7E_S%7E_null_null_&katalog=30852&pvo=MZD01-C&evo=v208_!_MZD-Q-ABS-od2011_1&u=v159__VUZEMI__100__{region_code[i]}"]
        for path in paths:
            try:
                df = pd.read_html(path, decimal=",", thousands="\u00A0")[6][["Unnamed: 0_level_0", "Průměrná měsíční mzda (na přepočtené počty zaměstnanců)"]].droplevel(level=1, axis=1)
                # Iterate over rows using iterrows
                for index, row in df.iterrows():
                    time_data = str.split(row['Unnamed: 0_level_0'])
                    year = int(time_data[0])
                    # get the first two chars
                    quarter = time_data[1][:2]
                    qrtr_months = quarters.get(quarter)
                    for month in qrtr_months:
                        dict = {
                            'month': int(month),
                            'year': int(year),
                            'kraj': region_codes_map.get(region_code[i]),
                            'avg_monthly_salary': df["Průměrná měsíční mzda (na přepočtené počty zaměstnanců)"].iloc[index]
                        }
                        sal_df.loc[count] = dict
                        count += 1
            except Exception as e:
                print("error:", e, " ", " ", region_code[i])
    return sal_df

In [73]:
sal_df = get_salaries(region_code)
sal_df

Unnamed: 0,month,year,kraj,avg_monthly_salary
0,10,2010,PHA,33028
1,11,2010,PHA,33028
2,12,2010,PHA,33028
3,7,2010,PHA,30689
4,8,2010,PHA,30689
...,...,...,...,...
3985,5,2011,ZLK,21174
3986,6,2011,ZLK,21174
3987,1,2011,ZLK,20169
3988,2,2011,ZLK,20169


## Sort the DataFrames

In [74]:
sal_df_sorted = sal_df.sort_values(by=['year', 'kraj', 'month'])
sal_df_sorted.set_index(["month", "year", "kraj"], inplace=True)
sal_df_sorted.to_csv("./salaries.csv")
display(sal_df_sorted)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_monthly_salary
month,year,kraj,Unnamed: 3_level_1
1,2000,HKK,10963
2,2000,HKK,10963
3,2000,HKK,10963
4,2000,HKK,12169
5,2000,HKK,12169
5,...,...,...
5,2023,ZLK,39080
6,2023,ZLK,39080
7,2023,ZLK,38596
8,2023,ZLK,38596


## Merge the DataFrames

In [None]:
def merge_df(dfs):
    res_df = None
    for df_el in dfs:
        if res_df is None:
            res_df = df_el
        else:
            res_df = pd.concat([res_df, df_el], axis=1)
    return res_df

In [241]:
res_df = crim_df_sorted
display(res_df)
#res_df = pd+.concat([res_df, sal_df_sorted], axis=1)
res_df = res_df.join(sal_df_sorted)
# res_df.reset_index(inplace=True)
# res_df["year"] = res_df["year"].astype(int)
# res_df.set_index(keys=["month", "year", "kraj"], inplace=True)
# dfs = [sal_df_sorted, crim_df_sorted]
# res_df = merge_df(dfs)
#res_df
res_df_sorted = res_df.sort_values(by=['year', 'kraj', 'month'])
res_df_sorted

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,general_thefts,break_in_thefts
month,year,kraj,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2009,HKK,271,174
2,2009,HKK,245,164
3,2009,HKK,284,207
4,2009,HKK,271,162
5,2009,HKK,268,191
...,...,...,...,...
6,2023,ZLK,68,51
7,2023,ZLK,95,48
8,2023,ZLK,106,30
9,2023,ZLK,67,57


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,general_thefts,break_in_thefts,avg_monthly_salary
month,year,kraj,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2009,HKK,271,174,19132.0
2,2009,HKK,245,164,19132.0
3,2009,HKK,284,207,19132.0
4,2009,HKK,271,162,19885.0
5,2009,HKK,268,191,19885.0
...,...,...,...,...,...
6,2023,ZLK,68,51,39080.0
7,2023,ZLK,95,48,38596.0
8,2023,ZLK,106,30,38596.0
9,2023,ZLK,67,57,38596.0


In [242]:
res_df.to_csv("./crim_salaries.csv")