In [61]:
# Display the dataframe
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

In [69]:
import pandas as pd
import datetime

In [33]:
df = pd.read_csv("data/sample.csv", index_col="Unnamed: 0")
crowd_sample = df.iloc[:, 0:4]

crowd_sample["Datetime"] = pd.to_datetime(crowd_sample['Time'])
crowd_sample.set_index(["Datetime"], inplace=True)
crowd_sample = crowd_sample.drop(["Time"], axis=1)

In [None]:
# In case there is a missing value --> set it to NA
# df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
# https://numpy.org/doc/stable/reference/arrays.datetime.html

In [34]:
crowd_sample.head()

Unnamed: 0_level_0,CMSA-GAWW-22 Kloveniersburgwal,CMSA-GAWW-23 Bloedstraat,CMSA-GAWW-14 Oudezijds Voorburgwal t.h.v. 91
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-04-10 00:00:00,57,73,77
2021-04-10 00:15:00,57,42,66
2021-04-10 00:30:00,43,42,73
2021-04-10 00:45:00,33,27,71
2021-04-10 01:00:00,38,31,72


In [35]:
crowd_sample.index

DatetimeIndex(['2021-04-10 00:00:00', '2021-04-10 00:15:00',
               '2021-04-10 00:30:00', '2021-04-10 00:45:00',
               '2021-04-10 01:00:00', '2021-04-10 01:15:00',
               '2021-04-10 01:30:00', '2021-04-10 01:45:00',
               '2021-04-10 02:00:00', '2021-04-10 02:15:00',
               ...
               '2021-10-10 21:30:00', '2021-10-10 21:45:00',
               '2021-10-10 22:00:00', '2021-10-10 22:15:00',
               '2021-10-10 22:30:00', '2021-10-10 22:45:00',
               '2021-10-10 23:00:00', '2021-10-10 23:15:00',
               '2021-10-10 23:30:00', '2021-10-10 23:45:00'],
              dtype='datetime64[ns]', name='Datetime', length=672, freq=None)

In [36]:
# Any null values
missing_val_count_by_column = (crowd_sample.isnull().sum())
print(missing_val_count_by_column[missing_val_count_by_column > 0])

Series([], dtype: int64)


In [38]:
crowd_sample.size

2016

In [279]:
sensor1 = pd.read_csv("data/CMSA-GAWW-22-Kloveniersburgwal.csv")
sensor1["objectnummer"] = "CMSA-GAWW-22"
sensor1["location"] = "Kloveniersburgwal"
sensor1["Datetime"] = pd.to_datetime(sensor1['Datetime'])
sensor1 = sensor1.rename(columns={"CMSA-GAWW-22 Kloveniersburgwal": "count", "Datetime": "datetime"})
sensor1 = sensor1.reindex(columns=["objectnummer", "location", "datetime", "count"])

In [280]:
sensor1.head()

Unnamed: 0,objectnummer,location,datetime,count
0,CMSA-GAWW-22,Kloveniersburgwal,2021-04-10 00:00:00,57
1,CMSA-GAWW-22,Kloveniersburgwal,2021-04-10 00:15:00,57
2,CMSA-GAWW-22,Kloveniersburgwal,2021-04-10 00:30:00,43
3,CMSA-GAWW-22,Kloveniersburgwal,2021-04-10 00:45:00,33
4,CMSA-GAWW-22,Kloveniersburgwal,2021-04-10 01:00:00,38


In [281]:
sensor1.iloc[0, 0]

'CMSA-GAWW-22'

In [282]:
# Crowd prediction based on simple ARMA model, using sample data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
import warnings
import random

# Specify to ignore warning messages
warnings.filterwarnings("ignore")

# Training set
train = sensor1[-100:]
arima_fit = ARIMA(train["count"].values, order=(1,0,1)).fit()
fc, se, conf  = arima_fit.forecast(100, alpha=0.05)  # 95% conf

dt = [train.iloc[-1:]["datetime"].values[0] + np.timedelta64(15*i,'m') for i in range(100)]
forecasts = pd.DataFrame({"datetime": dt})
forecasts["forecast"] = fc
forecasts["lower_bound"] = conf[:, 0]
forecasts["upper_bound"] = conf[:, 1]

forecasts["objectnummer"] = "CMSA-GAWW-22"
forecasts["location"] = "Kloveniersburgwal"
forecasts["threshold"] = 99.5 #wrong number
forecasts = forecasts.reindex(columns=["objectnummer", "location", "datetime", "forecast", "lower_bound", "upper_bound", "threshold"])

forecasts.head()

Unnamed: 0,objectnummer,location,datetime,forecast,lower_bound,upper_bound,threshold
0,CMSA-GAWW-22,Kloveniersburgwal,2021-10-10 23:45:00,64.880307,-21.557033,151.317647,99.5
1,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 00:00:00,68.960869,-41.413342,179.335079,99.5
2,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 00:15:00,72.907906,-55.895254,201.711067,99.5
3,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 00:30:00,76.72579,-67.19884,220.650419,99.5
4,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 00:45:00,80.418744,-76.338605,237.176094,99.5


In [283]:
forecasts.to_csv("result/CMSA-GAWW-22_forecast.csv")

In [289]:
sensor1 = pd.read_csv("result/CMSA-GAWW-22_forecast.csv", index_col="Unnamed: 0")
threshold = 182.5
overcrowd = sensor1[sensor1.forecast > threshold][["objectnummer", "location", "datetime", "forecast", "threshold"]]
overcrowd

Unnamed: 0,objectnummer,location,datetime,forecast,threshold
87,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 21:30:00,182.684605,99.5
88,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 21:45:00,182.910386,99.5
89,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 22:00:00,183.12878,99.5
90,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 22:15:00,183.340027,99.5
91,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 22:30:00,183.544361,99.5
92,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 22:45:00,183.74201,99.5
93,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 23:00:00,183.933191,99.5
94,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 23:15:00,184.118116,99.5
95,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 23:30:00,184.29699,99.5
96,CMSA-GAWW-22,Kloveniersburgwal,2021-10-11 23:45:00,184.470011,99.5


In [308]:
sensor1["hour"] = sensor1.datetime.astype('datetime64[h]')
sensor1 = sensor1.drop(['date'], axis=1)
# dates.astype('datetime64[Y]').astype(int)

In [312]:
avg_crowd_per_hour_sensor1 = sensor1.groupby("hour")["forecast"].mean()

In [322]:
avg_crowd_per_hour_sensor1 = pd.DataFrame(avg_crowd_per_hour)
avg_crowd_per_hour_sensor1 = avg_crowd_per_hour_sensor1.rename(columns={"forecast":"Kloveniersburgwal"})
avg_crowd_per_hour_sensor1                              

Unnamed: 0_level_0,Kloveniersburgwal
hour,Unnamed: 1_level_1
2021-10-10 23:00:00,64.880307
2021-10-11 00:00:00,74.753327
2021-10-11 01:00:00,89.061564
2021-10-11 02:00:00,101.586962
2021-10-11 03:00:00,112.551667
2021-10-11 04:00:00,122.150145
2021-10-11 05:00:00,130.552632
2021-10-11 06:00:00,137.908149
2021-10-11 07:00:00,144.347153
2021-10-11 08:00:00,149.983842


In [332]:
sensor1

# table = pd.pivot_table(df, values='D', index=['A', 'B'],
#                     columns=['C'], aggfunc=np.sum)
# table
# C        large  small
# A   B
# bar one    4.0    5.0
#     two    7.0    6.0
# foo one    4.0    1.0
#     two    NaN    6.0

avg_crowd_per_hour = pd.pivot_table(sensor1, values="forecast", index="hour", columns="location", aggfunc=np.mean)
for i in avg_crowd_per_hour.columns:
    print(i)

Kloveniersburgwal
