# Time Series Exploratory Notebook

In [None]:
# load dependencies
import matplotlib.pyplot as plt
import statsmodels
import pandas as pd

## Historic Data

### Import data

In [None]:
# historic dataset
path = "../data/raw/"
file = "100014.csv"
df = pd.read_csv(path + file, sep=";")

### Dataset info

In [None]:
df.info()

**Description:** There are 11 columns with information in the dataset. Most of them are of dtype object. Two columns `Anzahl frei` and `Total Plätze` are of type int64 which is a good sign since we only expect nummeric characters there.

### Columns with na values

In [None]:
df.isna().any()

**Description:** There are no missing values in any columns of the historic dataset. All entries seem to be complete.

### Nr of garages in the dataset

In [None]:
df[["id", "id2", "title"]].drop_duplicates().reset_index(drop=True)

**Description:** There are a total of 16 distinct garages in the dataset. 

### Nr of entries per garage

In [None]:
df[["title", "published"]].groupby("title").count()

**Description:** There is only one garage: "Parkhaus Kunstmuseum" which contains less entries than the rest.

### Nr unique of `Total Parkplätze`

In [None]:
df[["title", "total"]].groupby("title").nunique()

**Description:** Most garages have a constant number of parking spots for the duration of the dataset. However there are also two garages with 2 different totals: "Parkhaus Bad. Bahnhof" and "Parkhaus Claramatte". In a next step, we want to investigate the reason for this.

In [None]:
def show_total_spots_timeline(df, name, last_n_entries=None):
    '''
    Creates a lineplit with the total nr of available parking spots for as single garage
    '''
    df = df.copy()
    df = df[df["title"]==name][["published", "total"]].sort_values(by = "published").reset_index(drop=True)
    
    df["published"] = pd.to_datetime(df["published"])
    
    if last_n_entries:
        df = df[-last_n_entries:]
        
    df = df.sort_values(by="published")
    
    plt.figure(figsize=(16,5))
    plt.title(f"Total nr of parking spots for: {name}")
    plt.plot(df["published"], df["total"])
    plt.xlabel("Timeline")
    plt.ylabel("Total spots")
    plt.show()

### Timeline for "Parkhaus Bad. Bahnhof"

In [None]:
show_total_spots_timeline(df, name = "Parkhaus Bad. Bahnhof")    
show_total_spots_timeline(df, name = "Parkhaus Bad. Bahnhof", last_n_entries=7*24)

### Timeline for "Parkhaus Claramatte"

In [None]:
show_total_spots_timeline(df, name = "Parkhaus Claramatte")    
show_total_spots_timeline(df, name = "Parkhaus Claramatte", last_n_entries=7*24)