In [1]:
import pandas as pd

# load the crimes sheet
df = pd.read_excel('../data/raw/crimes.xlsx')

# quick peek
df.head()

Unnamed: 0,Crime ID,Crime Date,Country,Longitude,Crime Type ID,Resolved,People Involved,Crime DateTime
0,1,2021-09-01,United Kingdom,,1,0,2,01.09.2021 02:00
1,2,2023-08-02,France,,9,0,2,02.08.2023 17:30
2,3,2022-04-10,Germany,,12,0,2,10.04.2022 22:35
3,4,2023-09-21,Italy,,11,1,5,21.09.2023 01:45
4,5,2022-04-02,Spain,,12,1,2,02.04.2022 20:40


In [2]:
# basic info
print("Shape (rows, columns):", df.shape)
print("\nColumn names:")
print(df.columns.tolist())
print("\nData types:")
print(df.dtypes)
print("\nMissing values per column:")
print(df.isna().sum())

Shape (rows, columns): (2500, 8)

Column names:
['Crime ID', 'Crime Date', 'Country', 'Longitude', 'Crime Type ID', 'Resolved', 'People Involved', 'Crime DateTime']

Data types:
Crime ID                    int64
Crime Date         datetime64[ns]
Country                    object
Longitude                 float64
Crime Type ID               int64
Resolved                    int64
People Involved             int64
Crime DateTime             object
dtype: object

Missing values per column:
Crime ID              0
Crime Date            0
Country               0
Longitude          2500
Crime Type ID         0
Resolved              0
People Involved       0
Crime DateTime        0
dtype: int64


In [None]:
# 2. basic rename & keep only what we need
df = df.rename(columns={'Crime Date': 'Date', 'Country': 'City'})[['Date', 'City', 'Crime Type ID', 'Longitude', 'Resolved']]

# 3. drop rows missing Date or City
clean = df.dropna(subset=['Date', 'City'])

# 4. force Date to datetime
clean['Date'] = pd.to_datetime(clean['Date'], errors='coerce')
clean = clean.dropna(subset=['Date'])

# 5. save
clean.to_csv('../data/processed/crimes_clean.csv', index=False)

print("Clean file saved! Shape:", clean.shape)

In [None]:
import os
os.makedirs('../data/processed', exist_ok=True)
clean.to_csv('../data/processed/crimes_clean.csv', index=False)
print("Clean file saved! Shape:", clean.shape)