# Data Overview from opendata.dk

The available parking spaces and their occupancy are counted in Copenhagen and the data sets are available on [Opendata](opendata.dk). There are two datasets available: 
- **Parking spaces**: The data set shows legal parking spaces during the day (7am-6pm) at street level (on public and private shared roads), parking spaces in publicly owned parking facilities as well as parking spaces without a parking system. Parking options for electric cars, shared cars, taxis and disabled drivers appear. Also includes parking spaces reserved for embassies and consulates.
- **Parking counts**: Parking counts on roads/road sections are conducted twice in March and October at 12:00, 17:00 and 22:00 in selected areas. Parking occupancy rates are calculated based on the parking counts.

The data sets will be analyzed and preprocessed in the following cells.

!NB the fields lovlig_p indicate the number of parking spaces. It does not indicate the number of legal parking spaces.
The fields occupancy_ indicate parked_cars divided by legal_p * 100 (rounded)

In [1]:
## Imports
import matplotlib.pyplot as plt
import pandas as pd
import os

## Parking counts

In [8]:
data_path = os.path.abspath(os.path.join(os.pardir, "projectData"))
cleaned_data_path = os.path.join(data_path, "parking_counts.csv")
df = pd.read_csv(cleaned_data_path)

In [9]:
# Drop redundant columns and assign the result back to df
df = df.drop(columns=["FID", "pkey", "taelle_id", "bemaerkning", "kategori", "ogc_fid"])

# Replace NaN with empty string and -1
df["straekning"] = df["straekning"].fillna("")
df["fra_m"] = df["fra_m"].fillna(-1)
df["til_m"] = df["til_m"].fillna(-1)

df.head()

Unnamed: 0,vejnavn,straekning,lovlig_p_kl_12,parkerede_biler_kl_12,belaegning_kl_12_pct,lovlig_p_kl_17,parkerede_biler_kl_17,belaegning_kl_17_pct,lovlig_p_kl_22,parkerede_biler_kl_22,belaegning_kl_22_pct,aar_mnd,vej_id,fra_m,til_m,omraade,id,wkb_geometry
0,Dyrkøb,,41,40.0,98.0,41,32.0,78.0,41,24.0,59.0,201609.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
1,Dyrkøb,,41,45.0,110.0,41,36.0,88.0,41,35.0,85.0,201303.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
2,Dyrkøb,,43,46.0,107.0,43,43.0,100.0,43,29.0,67.0,201203.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
3,Dyrkøb,,41,44.0,107.0,41,44.0,107.0,41,37.0,90.0,201403.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
4,Dyrkøb,,41,40.0,98.0,41,20.0,49.0,41,18.0,44.0,201603.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...


In [10]:
### Correct date

# Only a single entry has a blank date, so we remove it
df.dropna(subset=["aar_mnd"], inplace=True) #Remove rows with NaN in "aar_mnd" column

## aar_mnd is a float with format YYYYMM, so we need to convert it to a datetime object
df['aar_mnd_dt'] = pd.to_datetime(df['aar_mnd'].astype(int).astype(str), format='%Y%m')
# Seperate year and month into two columns
df['aar'] = df['aar_mnd_dt'].dt.year
df['mnd'] = df['aar_mnd_dt'].dt.month
# Drop the original column
df.drop(columns=["aar_mnd_dt"], inplace=True) #Remove the original column
df.drop(columns=["aar_mnd"], inplace=True) #Remove the original column


In [None]:
### Save to new CSV file
#Rename/translate the columns to English to be more readable

df.rename(columns={"vejnavn": "street_name", 
                   "straekning": "stretch",
                   "lovlig_p_kl_12": "legal_p_at_12",
                   "parkerede_biler_kl_12": "parked_cars_at_12",
                   "belaegning_kl_12_pct": "occupancy_at_12_pct",
                   "lovlig_p_kl_17": "legal_p_at_17",
                   "parkerede_biler_kl_17": "parked_cars_at_17",
                   "belaegning_kl_17_pct": "occupancy_at_17_pct",
                   "lovlig_p_kl_22": "legal_p_at_22",
                   "parkerede_biler_kl_22": "parked_cars_at_22",
                   "belaegning_kl_22_pct": "occupancy_at_22_pct",
                   "aar": "year",
                   "mnd": "month",
                   "vej_id": "street_id",
                   "fra_m": "from_m",
                   "til_m": "to_m",
                   "omraade": "area",
                   }, inplace=True)

#Save the cleaned data to a new CSV file
df.to_csv(os.path.join(data_path, "cleaned_parking_counts.csv"), index=False)


## Parking spaces

In [27]:
# Loading the data set
cleaned_data_path = os.path.join(data_path, "parking_spaces.csv")
df = pd.read_csv(cleaned_data_path)
df.columns = df.columns.str.strip()

# Drop redundant columns and assign the result back to df
df = df.drop(columns=["FID", "vejside", "bemaerkning", "taelle_id", "startdato_midlertidigt_nedlagt",
                      "slutdato_midlertidigt_nedlagt", "restriktionstype", "restriktionstekst", "taelle_note",
                      "delebilsklub", "aendring_p_ordning", "uuid", "ogc_fid"],  errors='ignore')

# Replace NaN with empty string and -1

# Rename/translate the columns to English to be more readable
df.rename(columns={
    "vejkode": "street_code", 
    "vejnavn": "street_name", 
    "antal_pladser": "num_spaces", 
    "restriktion": "restriction", 
    "vejstatus": "street_status", 
    "bydel": "district", 
    "p_ordning": "parking_order", 
    "p_type": "parking_type", 
    "p_status": "parking_status", 
    "rettelsedato": "correction_date", 
    "oprettelsesdato": "creation_date", 
    "id": "id", 
    "wkb_geometry": "wkb_geometry"
}, inplace=True)

# Save the cleaned data to a new CSV file
df.to_csv(os.path.join(data_path, "cleaned_parking_spaces.csv"), index=False)
