In [20]:
## Import

import matplotlib.pyplot as plt
import pandas as pd
import os

data_path = os.path.abspath(os.path.join(os.pardir, "projectData"))
cleaned_data_path = os.path.join(data_path, "taelling_p_pladser.csv")
df = pd.read_csv(cleaned_data_path)

In [21]:
## Remove unnecessary columns
df.drop(columns=["FID", "pkey", "taelle_id", "bemaerkning", "kategori", "ogc_fid"], inplace=True) #Removes columns with redundant data

#For "straekning" replace NaN with ""
df["straekning"].fillna("", inplace=True) #Replace NaN with empty string

#"fra_m" and "til_m" also have some blanks (replace with -1?)
df["fra_m"].fillna(-1, inplace=True) #Replace NaN with -1
df["til_m"].fillna(-1, inplace=True) #Replace NaN with -1

df.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["straekning"].fillna("", inplace=True) #Replace NaN with empty string
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["fra_m"].fillna(-1, inplace=True) #Replace NaN with -1
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate 

Unnamed: 0,vejnavn,straekning,lovlig_p_kl_12,parkerede_biler_kl_12,belaegning_kl_12_pct,lovlig_p_kl_17,parkerede_biler_kl_17,belaegning_kl_17_pct,lovlig_p_kl_22,parkerede_biler_kl_22,belaegning_kl_22_pct,aar_mnd,vej_id,fra_m,til_m,omraade,id,wkb_geometry
0,Dyrkøb,,41,40.0,98.0,41,32.0,78.0,41,24.0,59.0,201609.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
1,Dyrkøb,,41,45.0,110.0,41,36.0,88.0,41,35.0,85.0,201303.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
2,Dyrkøb,,43,46.0,107.0,43,43.0,100.0,43,29.0,67.0,201203.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
3,Dyrkøb,,41,44.0,107.0,41,44.0,107.0,41,37.0,90.0,201403.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...
4,Dyrkøb,,41,40.0,98.0,41,20.0,49.0,41,18.0,44.0,201603.0,1011396000000,0.0,115.0,1.0,1,MULTILINESTRING ((12.573769564824445 55.679320...


In [22]:
###Correct date

#Only a single entry has a blank date, so we remove it
df.dropna(subset=["aar_mnd"], inplace=True) #Remove rows with NaN in "aar_mnd" column

##aar_mnd is a float with format YYYYMM, so we need to convert it to a datetime object
df['aar_mnd_dt'] = pd.to_datetime(df['aar_mnd'].astype(int).astype(str), format='%Y%m')
# Seperate year and month into two columns
df['aar'] = df['aar_mnd_dt'].dt.year
df['mnd'] = df['aar_mnd_dt'].dt.month
# Drop the original column
df.drop(columns=["aar_mnd_dt"], inplace=True) #Remove the original column
df.drop(columns=["aar_mnd"], inplace=True) #Remove the original column


In [23]:
### Save to new CSV file

#Rename/translate the columns to English to be more readable

df.rename(columns={"vejnavn": "street_name", 
                   "straekning": "stretch",
                   "lovlig_p_kl_12": "legal_p_at_12",
                   "parkerede_biler_kl_12": "parked_cars_at_12",
                   "belaegning_kl_12_pct": "occupancy_at_12_pct",
                   "lovlig_p_kl_17": "legal_p_at_17",
                   "parkerede_biler_kl_17": "parked_cars_at_17",
                   "belaegning_kl_17_pct": "occupancy_at_17_pct",
                   "lovlig_p_kl_22": "legal_p_at_22",
                   "parkerede_biler_kl_22": "parked_cars_at_22",
                   "belaegning_kl_22_pct": "occupancy_at_22_pct",
                   "aar": "year",
                   "mnd": "month",
                   "vej_id": "street_id",
                   "fra_m": "from_m",
                   "til_m": "to_m",
                   "omraade": "area",
                   }, inplace=True)

#Save the cleaned data to a new CSV file
df.to_csv(os.path.join(data_path, "cleaned_parking.csv"), index=False)
