In [1]:
import requests
import pandas as pd
import pytz

# Step 1: Fetch the data from the API
url = "https://polisen.se/api/events"
response = requests.get(url)

# Step 2: Ensure the request was successful
if response.status_code == 200:
    # Step 3: Load the response into a JSON format
    data = response.json()
    
    # Step 4: Convert JSON data to a Pandas DataFrame
    df = pd.DataFrame(data)
  
    # Step 5: Dealing with datetime-column
    df['datetime'] = pd.to_datetime(df['datetime'])

    sweden_tz = pytz.timezone('Europe/Stockholm')
    df['datetime'] = df['datetime'].dt.tz_convert(sweden_tz)

    df['date'] = df['datetime'].dt.date
    df['time'] = df['datetime'].dt.time
    
    # Step 6: Split 'location' into 'Location_name' and 'Location_gps'
    df['location_name'] = df['location'].apply(lambda x: x['name'])
    df['location_gps'] = df['location'].apply(lambda x: x['gps'])

    # Step 7: Extract the middle part (event type) from the 'name' field
    df['name'] = df['name'].apply(lambda x: x.split(',')[1].strip()) # Otherwise the name contains date, type of event and place

    # Step 8: Transform data types
    df['id'] = df['id'].astype('int64')  # Ensuring id is int64
    df['datetime'] = pd.to_datetime(df['datetime'])  # Converting to datetime

    # Step 9: Split 'Location_gps' into separate 'latitude' and 'longitude' columns
    df[['latitude', 'longitude']] = df['location_gps'].str.split(',', expand=True)
    df['latitude'] = df['latitude'].astype(float)  # Convert to float
    df['longitude'] = df['longitude'].astype(float)  # Convert to float

    # Step 10: Drop the original 'Location_gps' column
    df = df.drop(columns=['location', 'location_gps','url'])

else:
    print(f"Failed to fetch data. Status code: {response.status_code}")




In [2]:
df.head()

Unnamed: 0,id,datetime,name,summary,type,date,time,location_name,latitude,longitude
0,551137,2024-10-01 19:47:04+02:00,Mord/dråp,Skadad man som inkommit till akuten.,"Mord/dråp, försök",2024-10-01,19:47:04,Karlskrona,56.161224,15.5869
1,551143,2024-10-01 19:45:23+02:00,Häleri,Man i 45-årsåldern misstänkt för häleri.,Häleri,2024-10-01,19:45:23,Örebro,59.275263,15.213411
2,551141,2024-10-01 19:20:38+02:00,Trafikolycka,Viltolycka med älg på länsväg 321 Myrviken-Hov...,"Trafikolycka, vilt",2024-10-01,19:20:38,Berg,62.772481,14.169867
3,551142,2024-10-01 19:29:00+02:00,Trafikolycka,Två personbilar i kollision.,Trafikolycka,2024-10-01,19:29:00,Malmö,55.604981,13.003822
4,551133,2024-10-01 17:30:51+02:00,Brand,Lägenhetsbrand i Eskilstuna.,Brand,2024-10-01,17:30:51,Eskilstuna,59.371249,16.509805


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype                           
---  ------         --------------  -----                           
 0   id             500 non-null    int64                           
 1   datetime       500 non-null    datetime64[ns, Europe/Stockholm]
 2   name           500 non-null    object                          
 3   summary        500 non-null    object                          
 4   type           500 non-null    object                          
 5   date           500 non-null    object                          
 6   time           500 non-null    object                          
 7   location_name  500 non-null    object                          
 8   latitude       500 non-null    float64                         
 9   longitude      500 non-null    float64                         
dtypes: datetime64[ns, Europe/Stockholm](1), float64(2), int64(1), 

In [4]:
# Change datatypes of columns to appropriate ones
df['id'] = df['id'].astype('int64') 
df['name'] = df['name'].astype('string') 
df['location_name'] = df['location_name'].astype('string')
df['summary'] = df['summary'].astype('string')
df['type'] = df['type'].astype('object')  
df['latitude'] = df['latitude'].astype(float) 
df['longitude'] = df['longitude'].astype(float)

df['date'] = pd.to_datetime(df['date']) 
df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S')

# Verify the data types after transformation
print(df.dtypes)


id                                          int64
datetime         datetime64[ns, Europe/Stockholm]
name                               string[python]
summary                            string[python]
type                                       object
date                               datetime64[ns]
time                               datetime64[ns]
location_name                      string[python]
latitude                                  float64
longitude                                 float64
dtype: object


In [98]:
print(df['name'].value_counts())

Trafikolycka                     126
Sammanfattning natt               76
Rattfylleri                       37
Misshandel                        33
Brand                             32
Trafikkontroll                    30
Övrigt                            22
Trafikbrott                       12
Stöld                             11
Arbetsplatsolycka                 10
Rån                                9
Sammanfattning kväll och natt      8
Narkotikabrott                     7
Fylleri/LOB                        7
Stöld/inbrott                      6
Knivlagen                          5
Inbrott                            5
Mord/dråp                          4
Olaga hot                          4
Olaga intrång                      4
Olovlig körning                    4
Motorfordon                        4
Bråk                               4
Bedrägeri                          4
Sedlighetsbrott                    3
Polisinsats/kommendering           3
Kontroll person/fordon             3
D

In [99]:
# I don't think that Sammanfattning natt/kväll

df = df[df['name']!='Sammanfattning natt']
df = df[df['name']!='Sammanfattning kväll och natt']
df = df[df['name']!='Övrigt']

df['name'].value_counts()

Trafikolycka                126
Rattfylleri                  37
Misshandel                   33
Brand                        32
Trafikkontroll               30
Trafikbrott                  12
Stöld                        11
Arbetsplatsolycka            10
Rån                           9
Fylleri/LOB                   7
Narkotikabrott                7
Stöld/inbrott                 6
Inbrott                       5
Knivlagen                     5
Olaga intrång                 4
Olovlig körning               4
Mord/dråp                     4
Olaga hot                     4
Bedrägeri                     4
Bråk                          4
Motorfordon                   4
Djur                          3
Våld/hot mot tjänsteman       3
Polisinsats/kommendering      3
Kontroll person/fordon        3
Sedlighetsbrott               3
Skottlossning                 2
Skadegörelse                  2
Vapenlagen                    2
Försvunnen person             2
Fjällräddning                 2
Efterlys

In [9]:
from google.cloud import bigquery
from google.oauth2 import service_account

credentials = service_account.Credentials.from_service_account_file(
    "C:\\Users\\lalka\\OneDrive\\Skrivbord\\Studier\\Projekt\\working repo\\ds_project\\crime-in-sweden-project-47eef163c346.json"
) # Chage to your own path to JSON key

# Set up BigQuery Client with credentials
client = bigquery.Client(credentials=credentials, project=credentials.project_id)

# Define BigQuery dataset and table name
project_id = "crime-in-sweden-project"  
dataset_id = "Crime_in_Sweden"
table_id = f"{project_id}.{dataset_id}.events" 

# Create the dataset if it does not exist
try:
    client.get_dataset(dataset_id)  # Check if dataset exists
except Exception as e:
    # If dataset does not exist, create it
    dataset = bigquery.Dataset(f"{project_id}.{dataset_id}")
    client.create_dataset(dataset)  # This creates the dataset
    print(f"Dataset {dataset_id} created.")

# Upload DataFrame to BigQuery
job_config = bigquery.LoadJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_APPEND)

# Load data to BigQuery
job = client.load_table_from_dataframe(df, table_id, job_config=job_config)

# Wait for the job to complete
job.result()

print(f"Data loaded successfully to {table_id}")



NameError: name 'df' is not defined

# Code for HTML page

In [104]:
import folium

# Create a map centered over Sweden
sweden_map = folium.Map(location=[61.0, 15.0], zoom_start=5)  # Coordinates roughly center Sweden

# Add crime locations to the map
for index, row in df.iterrows():
    folium.CircleMarker(
        location=(row['latitude'], row['longitude']),
        radius=5,  # Size of the dot
        color='red',  # Color of the dot
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        popup=f"{row['name']}<br>{row['datetime']}<br>{row['summary']}",  # Information on click
    ).add_to(sweden_map)

# Save the map to an HTML file
sweden_map.save('sweden_crime_map.html')

In [15]:
!pip install folium


Collecting folium
  Using cached folium-0.17.0-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting branca>=0.6.0 (from folium)
  Using cached branca-0.8.0-py3-none-any.whl.metadata (1.5 kB)
Collecting xyzservices (from folium)
  Using cached xyzservices-2024.9.0-py3-none-any.whl.metadata (4.1 kB)
Using cached folium-0.17.0-py2.py3-none-any.whl (108 kB)
Using cached branca-0.8.0-py3-none-any.whl (25 kB)
Using cached xyzservices-2024.9.0-py3-none-any.whl (85 kB)
Installing collected packages: xyzservices, branca, folium
Successfully installed branca-0.8.0 folium-0.17.0 xyzservices-2024.9.0
