# Study of crimes against heritage by province in Spain from 2010 to 2023

## Hypothesis, project and approach

This is a hypothetical case framed in the Ironhack Data Analysis Bootcamp, as a project for the third week.

The premise is that the three of us (David M., Greta and Luis H. Rodriguez) work in the data analysis team of an insurance company. The company is trying to identify where it should strengthen, improve or develop new insurance against theft against individuals in Spain.

The source of the project is the data collected and offered by the Ministry of the Interior of the Government of Spain between 2010 and 2023.

## Import libraries and data

In [42]:
# It is recommended to install the requirements.txt file

# %pip install -r requirements.txt

In [43]:
# Import libraries and functions.py

from functions import *
import pandas as pd
import streamlit as st
import folium
from streamlit_folium import folium_static
import matplotlib.pyplot as plt


In [44]:
# Import data from 01002(1).xlsx

url_robberies = ("https://github.com/LuisHRF/Home-Insurance-Study-Project-/raw/main/01002%20(1).xlsx")

data_raw_robberies = pd.read_excel(url_robberies)

data_raw_robberies.head(10)


Unnamed: 0,Unnamed: 1,1. CONTRA LAS PERSONAS,1.2.-Lesiones,5.1.-Hurtos,5.2.-Robos con fuerza en las cosas,5.2.1.-Robos con fuerza en las cosas en el interior de vehículos,5.2.2.-Robos con fuerza en viviendas,5.2.3.-Robos con fuerza en establecimientos,5.3.-Robos con violencia o intimidación,5.3.1.-Robos con violencia en vía pública,5.3.2.-Robos con violencia en viviendas,5.3.3.-Robos con violencia en establecimientos
0,2023,,,,,,,,,,,
1,Total Nacional,226761.0,118125.0,665622.0,272638.0,104988.0,84721.0,35072.0,64711.0,44406.0,3844.0,8238.0
2,Araba/Álava,1386.0,745.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0
3,Albacete,1638.0,851.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0
4,Alicante/Alacant,11416.0,5326.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0
5,Almería,4343.0,2517.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0
6,Ávila,722.0,453.0,784.0,646.0,79.0,269.0,75.0,26.0,10.0,7.0,5.0
7,Badajoz,2516.0,1621.0,4844.0,2682.0,622.0,967.0,383.0,231.0,107.0,29.0,70.0
8,Balears (Illes),10159.0,4495.0,23009.0,6806.0,3195.0,1937.0,738.0,1519.0,994.0,82.0,228.0
9,Barcelona,16470.0,10527.0,141789.0,44688.0,22066.0,12042.0,5442.0,22792.0,17358.0,808.0,1787.0


## Data cleaning, formating and processing

In [45]:
# First we eliminate the null rows that contain only the year value, since there is a pattern

data_clean = data_raw_robberies.dropna()

data_clean 

Unnamed: 0,Unnamed: 1,1. CONTRA LAS PERSONAS,1.2.-Lesiones,5.1.-Hurtos,5.2.-Robos con fuerza en las cosas,5.2.1.-Robos con fuerza en las cosas en el interior de vehículos,5.2.2.-Robos con fuerza en viviendas,5.2.3.-Robos con fuerza en establecimientos,5.3.-Robos con violencia o intimidación,5.3.1.-Robos con violencia en vía pública,5.3.2.-Robos con violencia en viviendas,5.3.3.-Robos con violencia en establecimientos
1,Total Nacional,226761.0,118125.0,665622.0,272638.0,104988.0,84721.0,35072.0,64711.0,44406.0,3844.0,8238.0
2,Araba/Álava,1386.0,745.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0
3,Albacete,1638.0,851.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0
4,Alicante/Alacant,11416.0,5326.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0
5,Almería,4343.0,2517.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0
...,...,...,...,...,...,...,...,...,...,...,...,...
779,Zaragoza,5171.0,1988.0,14860.0,7904.0,1895.0,1756.0,1271.0,1531.0,1052.0,96.0,235.0
780,Ceuta,833.0,393.0,983.0,723.0,497.0,93.0,49.0,189.0,141.0,8.0,13.0
781,Melilla,1011.0,501.0,1043.0,774.0,427.0,156.0,79.0,194.0,171.0,1.0,8.0
782,En el extranjero,158.0,17.0,2427.0,674.0,142.0,19.0,51.0,177.0,138.0,5.0,6.0


In [46]:
unique_first_column = data_clean.iloc[:, 0].unique()

unique_first_column 

array(['    Total Nacional', '    Araba/Álava', '    Albacete',
       '    Alicante/Alacant', '    Almería', '    Ávila', '    Badajoz',
       '    Balears (Illes)', '    Barcelona', '    Burgos',
       '    Cáceres', '    Cádiz', '    Castellón/Castelló',
       '    Ciudad Real', '    Córdoba', '    Coruña (A)', '    Cuenca',
       '    Girona', '    Granada', '    Guadalajara', '    Gipuzkoa',
       '    Huelva', '    Huesca', '    Jaén', '    León', '    Lleida',
       '    Rioja (La)', '    Lugo', '    Madrid', '    Málaga',
       '    Murcia', '    Navarra', '    Ourense', '    Asturias',
       '    Palencia', '    Palmas (Las)', '    Pontevedra',
       '    Salamanca', '    Santa Cruz de Tenerife', '    Cantabria',
       '    Segovia', '    Sevilla', '    Soria', '    Tarragona',
       '    Teruel', '    Toledo', '    Valencia/València',
       '    Valladolid', '    Bizkaia', '    Zamora', '    Zaragoza',
       '    Ceuta', '    Melilla', '    En el extranjero',
   

In [47]:
# Since each year has the same number of values, we set blocks for each year and iterate to determine which row to which row each year belongs to

block_sizes = [55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55]
start = 1
year_ranges = {}

for i, size in enumerate(block_sizes):
    end = start + size - 1
    year = 2023 - i
    year_ranges[(start, end)] = year
    start = end + 2  

print(year_ranges)

{(1, 55): 2023, (57, 111): 2022, (113, 167): 2021, (169, 223): 2020, (225, 279): 2019, (281, 335): 2018, (337, 391): 2017, (393, 447): 2016, (449, 503): 2015, (505, 559): 2014, (561, 615): 2013, (617, 671): 2012, (673, 727): 2011, (729, 783): 2010}


In [48]:
year_range = { # We create a dictionary where we can save the corresponding rows for each year
    (1, 55): 2023,
    (57, 111): 2022,
    (113, 167): 2021,
    (169, 223): 2020,
    (225, 279): 2019,
    (281, 335): 2018,
    (337, 391): 2017,
    (393, 447): 2016,
    (449, 503): 2015,
    (505, 559): 2014,
    (561, 615): 2013,
    (617, 671): 2012,
    (673, 727): 2011,
    (729, 783): 2010
}

data_clean_years = defi_years_per_block(data_clean, year_range)

data_clean_years.head(10)

Unnamed: 0,Unnamed: 1,1. CONTRA LAS PERSONAS,1.2.-Lesiones,5.1.-Hurtos,5.2.-Robos con fuerza en las cosas,5.2.1.-Robos con fuerza en las cosas en el interior de vehículos,5.2.2.-Robos con fuerza en viviendas,5.2.3.-Robos con fuerza en establecimientos,5.3.-Robos con violencia o intimidación,5.3.1.-Robos con violencia en vía pública,5.3.2.-Robos con violencia en viviendas,5.3.3.-Robos con violencia en establecimientos,Year
1,Total Nacional,226761.0,118125.0,665622.0,272638.0,104988.0,84721.0,35072.0,64711.0,44406.0,3844.0,8238.0,2023.0
2,Araba/Álava,1386.0,745.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0,2023.0
3,Albacete,1638.0,851.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0,2023.0
4,Alicante/Alacant,11416.0,5326.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0,2023.0
5,Almería,4343.0,2517.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0,2023.0
6,Ávila,722.0,453.0,784.0,646.0,79.0,269.0,75.0,26.0,10.0,7.0,5.0,2023.0
7,Badajoz,2516.0,1621.0,4844.0,2682.0,622.0,967.0,383.0,231.0,107.0,29.0,70.0,2023.0
8,Balears (Illes),10159.0,4495.0,23009.0,6806.0,3195.0,1937.0,738.0,1519.0,994.0,82.0,228.0,2023.0
9,Barcelona,16470.0,10527.0,141789.0,44688.0,22066.0,12042.0,5442.0,22792.0,17358.0,808.0,1787.0,2023.0
10,Burgos,1393.0,755.0,2828.0,1532.0,319.0,588.0,271.0,183.0,113.0,10.0,36.0,2023.0


In [49]:
# We apply different functions to clean the names of the columns, translate them into English

data_clean_years = cleaning_columns_replace(data_clean_years)


In [50]:
data_clean_years.columns = translate_columns(data_clean_years.columns)

data_clean_years.columns


Index(['Province', ' assault', ' injuries', 'small_robberies',
       'robberies_with_force', 'robberies_force_vehicles',
       'robberies_force_homes', 'robberies_force_stores',
       'robberies_violence_intimidation', 'robberies_violence_publicways',
       'robberies_violence_homes', 'robberies_violence_stores', 'year'],
      dtype='object')

In [51]:
values_to_remove = ['    Total Nacional', '    En el extranjero','    Desconocida']

data_clean_years = drop_specific_rows(data_clean_years, 'Province', values_to_remove)

data_clean_years = data_clean_years.drop([' injuries'], axis=1)

data_clean_years

Unnamed: 0,Province,assault,small_robberies,robberies_with_force,robberies_force_vehicles,robberies_force_homes,robberies_force_stores,robberies_violence_intimidation,robberies_violence_publicways,robberies_violence_homes,robberies_violence_stores,year
2,Araba/Álava,1386.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0,2023.0
3,Albacete,1638.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0,2023.0
4,Alicante/Alacant,11416.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0,2023.0
5,Almería,4343.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0,2023.0
6,Ávila,722.0,784.0,646.0,79.0,269.0,75.0,26.0,10.0,7.0,5.0,2023.0
...,...,...,...,...,...,...,...,...,...,...,...,...
777,Bizkaia,6945.0,15197.0,10849.0,29.0,2336.0,1470.0,2045.0,1449.0,157.0,261.0,2010.0
778,Zamora,810.0,1090.0,752.0,93.0,165.0,205.0,62.0,33.0,7.0,18.0,2010.0
779,Zaragoza,5171.0,14860.0,7904.0,1895.0,1756.0,1271.0,1531.0,1052.0,96.0,235.0,2010.0
780,Ceuta,833.0,983.0,723.0,497.0,93.0,49.0,189.0,141.0,8.0,13.0,2010.0


In [52]:
data_clean = data_clean_years


## Analysis 1 = Map with total crimes by year and province

In [53]:
data_coordinates_total = data_clean.copy()

data_coordinates_total = add_coordinates_from_dict(data_coordinates_total)
data_coordinates_total.head(10)

Unnamed: 0,Province,assault,small_robberies,robberies_with_force,robberies_force_vehicles,robberies_force_homes,robberies_force_stores,robberies_violence_intimidation,robberies_violence_publicways,robberies_violence_homes,robberies_violence_stores,year,Latitude,Longitude
2,Araba/Álava,1386.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0,2023.0,42.84671,-2.67245
3,Albacete,1638.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0,2023.0,38.99435,-1.85854
4,Alicante/Alacant,11416.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0,2023.0,38.34517,-0.48149
5,Almería,4343.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0,2023.0,36.83405,-2.46371
6,Ávila,722.0,784.0,646.0,79.0,269.0,75.0,26.0,10.0,7.0,5.0,2023.0,40.65668,-4.68186
7,Badajoz,2516.0,4844.0,2682.0,622.0,967.0,383.0,231.0,107.0,29.0,70.0,2023.0,38.87945,-6.97065
8,Balears (Illes),10159.0,23009.0,6806.0,3195.0,1937.0,738.0,1519.0,994.0,82.0,228.0,2023.0,39.57119,2.64663
9,Barcelona,16470.0,141789.0,44688.0,22066.0,12042.0,5442.0,22792.0,17358.0,808.0,1787.0,2023.0,41.38506,2.1734
10,Burgos,1393.0,2828.0,1532.0,319.0,588.0,271.0,183.0,113.0,10.0,36.0,2023.0,42.34399,-3.69691
11,Cáceres,1271.0,1949.0,1058.0,170.0,299.0,196.0,81.0,40.0,18.0,11.0,2023.0,39.47649,-6.37224


In [54]:
data_coordinates_total.columns

Index(['Province', ' assault', 'small_robberies', 'robberies_with_force',
       'robberies_force_vehicles', 'robberies_force_homes',
       'robberies_force_stores', 'robberies_violence_intimidation',
       'robberies_violence_publicways', 'robberies_violence_homes',
       'robberies_violence_stores', 'year', 'Latitude', 'Longitude'],
      dtype='object')

In [55]:
# Crear un data_frame más ajustado para el mapa
# Primero, crear una columna de crímenes totales por provincia
data_total_crimes = data_coordinates_total.copy()
data_total_crimes['Total_crimes'] = data_coordinates_total[[' assault', 'small_robberies',
       'robberies_with_force', 'robberies_force_vehicles',
       'robberies_force_homes', 'robberies_force_stores',
       'robberies_violence_intimidation', 'robberies_violence_publicways',
       'robberies_violence_homes', 'robberies_violence_stores']].sum(axis = 1)

data_total_crimes

Unnamed: 0,Province,assault,small_robberies,robberies_with_force,robberies_force_vehicles,robberies_force_homes,robberies_force_stores,robberies_violence_intimidation,robberies_violence_publicways,robberies_violence_homes,robberies_violence_stores,year,Latitude,Longitude,Total_crimes
2,Araba/Álava,1386.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0,2023.0,42.84671,-2.67245,7505.0
3,Albacete,1638.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0,2023.0,38.99435,-1.85854,8854.0
4,Alicante/Alacant,11416.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0,2023.0,38.34517,-0.48149,66131.0
5,Almería,4343.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0,2023.0,36.83405,-2.46371,19282.0
6,Ávila,722.0,784.0,646.0,79.0,269.0,75.0,26.0,10.0,7.0,5.0,2023.0,40.65668,-4.68186,2623.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,Bizkaia,6945.0,15197.0,10849.0,29.0,2336.0,1470.0,2045.0,1449.0,157.0,261.0,2010.0,43.26301,-2.93499,40738.0
778,Zamora,810.0,1090.0,752.0,93.0,165.0,205.0,62.0,33.0,7.0,18.0,2010.0,41.50332,-5.74456,3235.0
779,Zaragoza,5171.0,14860.0,7904.0,1895.0,1756.0,1271.0,1531.0,1052.0,96.0,235.0,2010.0,41.64882,-0.88909,35771.0
780,Ceuta,833.0,983.0,723.0,497.0,93.0,49.0,189.0,141.0,8.0,13.0,2010.0,35.88939,-5.31979,3529.0


In [56]:
data_frame_total_map = data_total_crimes[['Province', 'year', 'Latitude', 'Longitude', 'Total_crimes']]

data_frame_total_map.head(5)

Unnamed: 0,Province,year,Latitude,Longitude,Total_crimes
2,Araba/Álava,2023.0,42.84671,-2.67245,7505.0
3,Albacete,2023.0,38.99435,-1.85854,8854.0
4,Alicante/Alacant,2023.0,38.34517,-0.48149,66131.0
5,Almería,2023.0,36.83405,-2.46371,19282.0
6,Ávila,2023.0,40.65668,-4.68186,2623.0


In [57]:


data_frame_total_map['year'] = data_frame_total_map['year'].fillna(0)

data_frame_total_map['year'] = data_frame_total_map['year'].astype(int)


data_frame_total_map.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_frame_total_map['year'] = data_frame_total_map['year'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_frame_total_map['year'] = data_frame_total_map['year'].astype(int)


Unnamed: 0,Province,year,Latitude,Longitude,Total_crimes
2,Araba/Álava,2023,42.84671,-2.67245,7505.0
3,Albacete,2023,38.99435,-1.85854,8854.0
4,Alicante/Alacant,2023,38.34517,-0.48149,66131.0
5,Almería,2023,36.83405,-2.46371,19282.0
6,Ávila,2023,40.65668,-4.68186,2623.0


In [58]:
data_pivot_map = data_frame_total_map.pivot_table(index=['Province', 'Latitude', 'Longitude'], columns='year', values='Total_crimes', aggfunc='sum').reset_index()

data_pivot_map.head(5)

year,Province,Latitude,Longitude,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Albacete,38.99435,-1.85854,11936.0,11920.0,12239.0,11938.0,10920.0,9695.0,8623.0,8810.0,8781.0,8660.0,5842.0,6118.0,8429.0,8854.0
1,Alicante/Alacant,38.34517,-0.48149,103129.0,99317.0,96502.0,90555.0,84295.0,77307.0,75124.0,71958.0,66880.0,64787.0,46219.0,48226.0,58434.0,66131.0
2,Almería,36.83405,-2.46371,31231.0,31907.0,32091.0,29332.0,27385.0,25523.0,24405.0,22349.0,19456.0,18447.0,13537.0,15383.0,17857.0,19282.0
3,Araba/Álava,42.84671,-2.67245,12593.0,11817.0,10746.0,10273.0,9847.0,8783.0,7771.0,7652.0,7424.0,7427.0,4718.0,5615.0,7348.0,7505.0
4,Asturias,43.36191,-5.84939,22777.0,22737.0,23849.0,22953.0,21101.0,19168.0,17417.0,15968.0,15345.0,15231.0,11175.0,12022.0,15163.0,17377.0


In [59]:
data_pivot_map.to_csv('data_pivot_map.csv', index = False)

## Analysis 2 = Crime growth between 2010 and 2023

In [60]:
data_total_crimes

Unnamed: 0,Province,assault,small_robberies,robberies_with_force,robberies_force_vehicles,robberies_force_homes,robberies_force_stores,robberies_violence_intimidation,robberies_violence_publicways,robberies_violence_homes,robberies_violence_stores,year,Latitude,Longitude,Total_crimes
2,Araba/Álava,1386.0,4184.0,1099.0,8.0,434.0,178.0,114.0,82.0,6.0,14.0,2023.0,42.84671,-2.67245,7505.0
3,Albacete,1638.0,3771.0,1784.0,421.0,636.0,204.0,211.0,120.0,23.0,46.0,2023.0,38.99435,-1.85854,8854.0
4,Alicante/Alacant,11416.0,27014.0,12716.0,3723.0,6275.0,1241.0,1997.0,1161.0,245.0,343.0,2023.0,38.34517,-0.48149,66131.0
5,Almería,4343.0,6437.0,4193.0,1106.0,1602.0,399.0,657.0,397.0,67.0,81.0,2023.0,36.83405,-2.46371,19282.0
6,Ávila,722.0,784.0,646.0,79.0,269.0,75.0,26.0,10.0,7.0,5.0,2023.0,40.65668,-4.68186,2623.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,Bizkaia,6945.0,15197.0,10849.0,29.0,2336.0,1470.0,2045.0,1449.0,157.0,261.0,2010.0,43.26301,-2.93499,40738.0
778,Zamora,810.0,1090.0,752.0,93.0,165.0,205.0,62.0,33.0,7.0,18.0,2010.0,41.50332,-5.74456,3235.0
779,Zaragoza,5171.0,14860.0,7904.0,1895.0,1756.0,1271.0,1531.0,1052.0,96.0,235.0,2010.0,41.64882,-0.88909,35771.0
780,Ceuta,833.0,983.0,723.0,497.0,93.0,49.0,189.0,141.0,8.0,13.0,2010.0,35.88939,-5.31979,3529.0


In [61]:
data_frame_growth_crime = data_total_crimes[['Province', 'year', 'Total_crimes']]

data_frame_growth_crime['year'] = data_frame_growth_crime['year'].fillna(0)

data_frame_growth_crime['year'] = data_frame_growth_crime['year'].astype(int)

data_frame_growth_crime

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_frame_growth_crime['year'] = data_frame_growth_crime['year'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_frame_growth_crime['year'] = data_frame_growth_crime['year'].astype(int)


Unnamed: 0,Province,year,Total_crimes
2,Araba/Álava,2023,7505.0
3,Albacete,2023,8854.0
4,Alicante/Alacant,2023,66131.0
5,Almería,2023,19282.0
6,Ávila,2023,2623.0
...,...,...,...
777,Bizkaia,2010,40738.0
778,Zamora,2010,3235.0
779,Zaragoza,2010,35771.0
780,Ceuta,2010,3529.0


In [62]:
data_frame_growth_crime.to_csv('data_clean.csv', index = False)

## Analysis 3 = Robberies with violence

In [63]:
data_total_crimes.columns

Index(['Province', ' assault', 'small_robberies', 'robberies_with_force',
       'robberies_force_vehicles', 'robberies_force_homes',
       'robberies_force_stores', 'robberies_violence_intimidation',
       'robberies_violence_publicways', 'robberies_violence_homes',
       'robberies_violence_stores', 'year', 'Latitude', 'Longitude',
       'Total_crimes'],
      dtype='object')

In [64]:
data_robberies_violents = data_total_crimes[['Province', 'year', ' assault', 'robberies_violence_publicways', 'robberies_violence_intimidation']]

data_robberies_violents


Unnamed: 0,Province,year,assault,robberies_violence_publicways,robberies_violence_intimidation
2,Araba/Álava,2023.0,1386.0,82.0,114.0
3,Albacete,2023.0,1638.0,120.0,211.0
4,Alicante/Alacant,2023.0,11416.0,1161.0,1997.0
5,Almería,2023.0,4343.0,397.0,657.0
6,Ávila,2023.0,722.0,10.0,26.0
...,...,...,...,...,...
777,Bizkaia,2010.0,6945.0,1449.0,2045.0
778,Zamora,2010.0,810.0,33.0,62.0
779,Zaragoza,2010.0,5171.0,1052.0,1531.0
780,Ceuta,2010.0,833.0,141.0,189.0


In [65]:
data_robberies_violents.columns = data_robberies_violents.columns.str.strip()

data_robberies_violents = data_robberies_violents.rename(columns={'assault' : 'Violent assault', 'robberies_violence_publicways' : 'Robberies with violence in public places', 'robberies_violence_intimidation' : 'Robberies with violence and intimidation'})

data_robberies_violents


Unnamed: 0,Province,year,Violent assault,Robberies with violence in public places,Robberies with violence and intimidation
2,Araba/Álava,2023.0,1386.0,82.0,114.0
3,Albacete,2023.0,1638.0,120.0,211.0
4,Alicante/Alacant,2023.0,11416.0,1161.0,1997.0
5,Almería,2023.0,4343.0,397.0,657.0
6,Ávila,2023.0,722.0,10.0,26.0
...,...,...,...,...,...
777,Bizkaia,2010.0,6945.0,1449.0,2045.0
778,Zamora,2010.0,810.0,33.0,62.0
779,Zaragoza,2010.0,5171.0,1052.0,1531.0
780,Ceuta,2010.0,833.0,141.0,189.0


In [66]:
data_robberies_violents.to_csv('data_rob_violence.csv', index = False)

## Analisys 4: Some statistics

# Crimes per capita by province

In [83]:
population_url = ('https://github.com/LuisHRF/Home-Insurance-Study-Project-/raw/main/population.xlsx')

data_population = pd.read_excel(population_url)


In [84]:
data_population.columns = data_population.columns.str.lower().str.replace(" ", "")
data_population.columns = data_population.columns.str.strip()


data_population['province'].unique()

array(['Albacete', 'Alicante/Alacant', 'Almería', 'Araba/Álava',
       'Asturias', 'Badajoz', 'Balears (Illes)', 'Barcelona', 'Bizkaia',
       'Burgos', 'Cantabria', 'Castellón/Castelló', 'Ceuta',
       'Ciudad Real', 'Coruña (A)', 'Cuenca', 'Cáceres', 'Cádiz',
       'Córdoba', 'Gipuzkoa', 'Girona', 'Granada', 'Guadalajara',
       'Huelva', 'Huesca', 'Jaén', 'León', 'Lleida', 'Lugo', 'Madrid',
       'Melilla', 'Murcia', 'Málaga', 'Navarra', 'Ourense', 'Palencia',
       'Palmas (Las)', 'Pontevedra', 'Rioja (La)', 'Salamanca',
       'Santa Cruz de Tenerife', 'Segovia', 'Sevilla', 'Soria',
       'Tarragona', 'Teruel', 'Toledo', 'Valencia/València', 'Valladolid',
       'Zamora', 'Zaragoza', 'Ávila'], dtype=object)

In [86]:
data_just_crimes = data_frame_total_map[['Province', 'year', 'Total_crimes']]

data_just_crimes.columns = data_just_crimes.columns.str.lower()

data_total_crimes = data_just_crimes.groupby('province').sum(numeric_only=True).drop(columns=['year']).reset_index()

data_total_crimes['province'] = data_total_crimes['province'].str.strip()


In [88]:
data_per_capita = pd.merge(data_total_crimes, data_population, on = 'province')

data_per_capita.head(5)

Unnamed: 0,province,total_crimes,population
0,Albacete,132765.0,386464
1,Alicante/Alacant,1048864.0,1881762
2,Almería,328185.0,731792
3,Araba/Álava,119519.0,333626
4,Asturias,252283.0,1011792


In [90]:
data_per_capita['crimes_per_capita'] = data_per_capita['total_crimes'] / data_per_capita['population']

In [91]:
data_per_capita.to_csv('data_per_capita.csv', index = False)

# Kind of crimes by province

In [97]:
data_crimes_type = data_clean.groupby('Province').sum().drop(columns=['year']).reset_index()
data_crimes_type.dtypes

Province                            object
 assault                           float64
small_robberies                    float64
robberies_with_force               float64
robberies_force_vehicles           float64
robberies_force_homes              float64
robberies_force_stores             float64
robberies_violence_intimidation    float64
robberies_violence_publicways      float64
robberies_violence_homes           float64
robberies_violence_stores          float64
dtype: object

In [None]:
data_crimes_type.to_csv('data_crimes_type.csv', index = False)