<h1>Crimes registados (N.º) pelas autoridades policiais</h1>

<a href="https://dados.gov.pt/pt/datasets/crimes-registados-n-o-pelas-autoridades-policiais/#_">https://dados.gov.pt/pt/datasets/crimes-registados-n-o-pelas-autoridades-policiais/#_</a>

<h2>Library</h2>

In [None]:
import urllib.request, json
import ssl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

<h2>Main</h2>

In [None]:
# Disable SSL certificate verification to avoid error
ssl._create_default_https_context = ssl._create_unverified_context

url = "https://dados.gov.pt/pt/datasets/r/bb01e010-0ed5-4a19-8da2-613bee384241"
response = urllib.request.urlopen(url)
data = json.loads(response.read().decode())

In [None]:
#print(json.dumps(data, indent=4))

In [None]:
# Data available until 2022
for item in data:
    df = pd.json_normalize(item["Dados"]['2022'])

df.info()

In [None]:
# Remove NA with zero
df.fillna(0, inplace=True)

# Transform Valor into int
df['valor'] = df['valor'].astype(int)

# Revome Total from dataframe
df_filtered = df[~df['dim_3_t'].str.contains('Total')]

# Remove geo totals from dataframe
df_filtered = df_filtered[~df_filtered['geodsg'].str.contains('Região|Portugal|Continente|Norte|Centro|Área')]

# Remove unnecessary columns
df_filtered.drop(columns={'geocod','dim_3','sinal_conv','sinal_conv_desc'}, inplace=True)

# Rename columns
df_filtered.rename(columns={'geodsg':'city','dim_3_t':'crime', 'valor':'value'}, inplace=True)

In [None]:
df_filtered['crime'].value_counts()

In [None]:
# Sort the cities alphabetically
sorted_unique_cities = np.sort(df_filtered['city'].unique())
sorted_unique_cities

In [None]:
# Count of cities: 325
len(sorted_unique_cities)


In [None]:
# Pivot the DataFrame with aggregation (sum in this case)
matrix = df_filtered.pivot_table(index='crime', columns='city', values='value', aggfunc='sum')

# Sort the columns (cities) alphabetically
matrix = matrix.reindex(sorted(matrix.columns), axis=1)

# Create a heatmap
plt.figure(figsize=(160, 6))
sns.heatmap(matrix, cmap='YlGnBu', annot=True, annot_kws={"size": 5}, fmt='d', linewidths=0.5)

plt.title('Crime Quantity Heatmap')
plt.xlabel('City')
plt.ylabel('Crime')

# Display the plot with HTML to enable scrolling
plt.xticks(rotation=90, fontsize=8)
plt.yticks(fontsize=8)

plt.show()

In [None]:
# Sum the values across all crimes for each city
city_totals = df_filtered.groupby('city')['value'].sum()

# Sort the cities based on total quantities in descending order
ranked_cities = city_totals.sort_values(ascending=False)

# Create a bar chart to visualize the ranked cities
plt.figure(figsize=(50, 6))
sns.barplot(x=ranked_cities.index, y=ranked_cities.values, palette='YlGnBu')

plt.title('Ranking of Cities by Total Crime Quantity')
plt.xlabel('City')
plt.ylabel('Total Quantity')

plt.xticks(rotation=90, fontsize=5)
plt.yticks(fontsize=5)

plt.show()
