### Environment
`conda create  --name eleven-env python=3.8.16`  
`conda activate eleven-env`  
`/Users/Antoine/opt/anaconda3/envs/eleven-env/bin/pip install -r requirements.txt`

### Import libraries

In [1]:
import pandas as pd
from pathlib import Path
import os
import datetime
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go

In [2]:
notebook_path = Path(os.getcwd())
root_path = notebook_path.parent.absolute()
data_path = os.path.join(root_path, 'data')
os.chdir(root_path)

In [3]:
attendance = pd.read_csv(os.path.join(data_path, 'attendance.csv'))
entity_schedule = pd.read_csv(os.path.join(data_path, 'entity_schedule.csv'))
link_attraction_park = pd.read_csv(os.path.join(data_path, 'link_attraction_park.csv'))
#parade_night_show = pd.read_csv(os.path.join(data_path, 'parade_night_show.xlsx'))
waiting_times = pd.read_csv(os.path.join(data_path, 'waiting_times.csv'))
weather_data = pd.read_csv(os.path.join(data_path, 'weather_data.csv'))

### Attendance evolution

In [4]:
print(attendance.shape)
attendance["USAGE_DATE"] = pd.to_datetime(attendance.USAGE_DATE)
attendance.head(2)

(2367, 3)


Unnamed: 0,USAGE_DATE,FACILITY_NAME,attendance
0,2018-06-01,PortAventura World,46804
1,2018-06-01,Tivoli Gardens,20420


In [5]:
start_date = pd.to_datetime('2018/06/01')
end_date = pd.to_datetime('2019/06/01')
start_date, end_date

(Timestamp('2018-06-01 00:00:00'), Timestamp('2019-06-01 00:00:00'))

In [6]:
to_plot = attendance.copy()
to_plot = to_plot[(to_plot.USAGE_DATE>start_date)&(to_plot.USAGE_DATE<end_date)][to_plot.FACILITY_NAME=="Tivoli Gardens"]
to_plot.head(2)

  to_plot = to_plot[(to_plot.USAGE_DATE>start_date)&(to_plot.USAGE_DATE<end_date)][to_plot.FACILITY_NAME=="Tivoli Gardens"]


Unnamed: 0,USAGE_DATE,FACILITY_NAME,attendance
3,2018-06-02,Tivoli Gardens,29110
5,2018-06-03,Tivoli Gardens,23727


In [7]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=to_plot["USAGE_DATE"], y=to_plot["attendance"], mode="lines", name='Attendance', line=dict(color='#002244')))
fig.update_layout(yaxis_title='CPU usage', width=800, height=400)
fig.show()

In [25]:
def attendance_figures(df, attraction_list, start_date, end_date, date_label, attendance_label, attraction_label):
    '''
    Return the attendance mean, max and min. 

    Inputs:
        df : pd.DataFrame() = table of the attendance per date and per attraction. 
        attraction_list : list(str) = list of the names of the attractions.
        start_date : str = starting date of the period with format yyyy/mm/dd.
        end_date : str = ending date of the period with format yyyy/mm/dd.
        date_label : str = label of the date column.
        attendance_label : str = mabel of the attendance column.
        attraction_label : str = label of the attraction column.

    Outputs:
        mean : int = average daily attendnace of the attraction over the period.
        min : min daily attendance at the attraction during the period.
        max : max daily attendance at the attraction during the perdiod. 
    '''
    to_plot = df.copy()

    # Change date types
    to_plot[f"{date_label}"] = pd.to_datetime(to_plot[f"{date_label}"])
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # Filter the table
    to_plot = to_plot[(to_plot[f"{date_label}"]>start_date)&(to_plot[f"{date_label}"]<end_date)][to_plot[f"{attraction_label}"].isin(attraction_list)]

    # Calculate figures
    val_min = to_plot[f"{attendance_label}"].min()
    val_avg = to_plot[f"{attendance_label}"].mean()
    val_max = to_plot[f"{attendance_label}"].max()

    # Generate plot
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=to_plot["USAGE_DATE"], y=to_plot["attendance"], mode="lines", name='Attendance', line=dict(color='#002244')))
    fig.update_layout(yaxis_title='CPU usage', width=800, height=400)
    
    return fig, [val_min, val_avg, val_max]

In [26]:
df = attendance
attraction = ["Tivoli Gardens"]
start_date = '2018/06/01'
end_date = '2019/06/01'
date_label = "USAGE_DATE"
attendance_label = "attendance"
attraction_label = "FACILITY_NAME"

In [27]:
fig, L = attendance_figures(df, attraction, start_date, end_date, date_label, attendance_label, attraction_label)
print('min', 'mean', 'max')
print(L)

min mean max
[5320, 22382.684065934067, 44052]



Boolean Series key will be reindexed to match DataFrame index.



In [28]:
fig.show()

In [4]:
df = pd.read_csv(os.path.join(data_path, 'all_data_clean.csv'))
df.head()

  df = pd.read_csv(os.path.join(data_path, 'all_data_clean.csv'))


Unnamed: 0,work_date,deb_time_line,fin_time_line,entity_description_short,wait_time_max,nb_units,guest_carried,capacity,adjust_capacity,open_time,...,wind_deg,wind_gust,rain_1h,rain_3h,snow_1h,clouds_all,weather_id,weather_main,weather_icon,summer_time
0,2018-10-01 00:00:00,2018-10-01 16:30:00,2018-10-01 16:45:00,Roller Coaster,40,1.7333,25.0,75.0,65.0,15,...,334,0.0,0.0,0.0,0.0,91,804,Clouds,04d,True
1,2018-10-01 00:00:00,2018-10-01 16:45:00,2018-10-01 17:00:00,Roller Coaster,15,2.0,25.0,75.0,75.0,15,...,334,0.0,0.0,0.0,0.0,91,804,Clouds,04d,True
2,2018-10-01 00:00:00,2018-10-01 16:15:00,2018-10-01 16:30:00,Roller Coaster,10,2.0,29.0,75.0,75.0,15,...,334,0.0,0.0,0.0,0.0,91,804,Clouds,04d,True
3,2018-10-01 00:00:00,2018-10-01 16:00:00,2018-10-01 16:15:00,Roller Coaster,10,2.0,30.0,75.0,75.0,15,...,334,0.0,0.0,0.0,0.0,91,804,Clouds,04d,True
4,2018-10-01 00:00:00,2018-10-01 16:30:00,2018-10-01 16:45:00,Bumper Cars,10,18.0,144.0,254.749,254.75,15,...,334,0.0,0.0,0.0,0.0,91,804,Clouds,04d,True
