In [48]:
import matplotlib.pyplot as plt
import pandas as pd
import os

In [49]:
folder_path = "./Data"

### Question 1

You are to calculate the average waiting time for renting or returning a bicycle at each
station. If tourists should not wait more than one hour in total (both for renting bikes
and returning them), is the current system satisfactory?

In [50]:
csv_files = [f for f in os.listdir(folder_path) if "queue" in f]


len(csv_files)

8

In [56]:
all_waits = []

for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    
    # Drop empty columns
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Compute wait time
    df["wait_time"] = df["curr_WaitTime"] - df["prev_WaitTime"]
    
    # Keep relevant columns
    df = df[["replication_no", "id", "wait_time"]]
    
    all_waits.append(df)
    
all_waits

[      replication_no   id  wait_time
 0                  1   10        0.0
 1                  1   15        0.0
 2                  1   20        0.0
 3                  1   23        0.0
 4                  1   13        0.0
 ...              ...  ...        ...
 3330               6  817        0.0
 3331               6  807        0.0
 3332               6  805        0.0
 3333               6  808        0.0
 3334               6  742        0.0
 
 [3335 rows x 3 columns],
       replication_no   id  wait_time
 0                  1   11        0.0
 1                  1   16        0.0
 2                  1   14        0.0
 3                  1   17        0.0
 4                  1   18        0.0
 ...              ...  ...        ...
 3346               6  799        0.0
 3347               6  806        0.0
 3348               6  827        0.0
 3349               6  823        0.0
 3350               6  813        0.0
 
 [3351 rows x 3 columns],
       replication_no   id  wait

In [52]:
combined_df = pd.concat(all_waits, ignore_index=True)

combined_df

Unnamed: 0,replication_no,id,wait_time
0,1,10,0.000000
1,1,15,0.000000
2,1,20,0.000000
3,1,23,0.000000
4,1,13,0.000000
...,...,...,...
25953,6,772,141.012307
25954,6,762,137.867399
25955,6,780,130.931648
25956,6,765,133.597643


In [58]:
def get_wait_times(replication_no, tourist_id, df):
    """
    Returns a NumPy array of all wait times for a given tourist ID and replication number.
    
    Parameters:
        replication_no (int): The replication number.
        tourist_id (int): The ID of the tourist.
        df (pd.DataFrame): The combined all_waits DataFrame.
    
    Returns:
        np.ndarray: Array of wait times for the given tourist in that replication.
    """
    filtered = df[(df["replication_no"] == replication_no) & (df["id"] == tourist_id)]
    return filtered["wait_time"].values

get_wait_times(1,10,combined_df)

array([ 0.      ,  0.      ,  0.      ,  1.644738, 11.707794, 27.737416])

In [53]:
tourist_avg_waits = (
    combined_df.groupby(["replication_no", "id"])
    .agg(total_wait=("wait_time", "sum"), num_interactions=("wait_time", "count"))
    .reset_index()
)
tourist_avg_waits

Unnamed: 0,replication_no,id,total_wait,num_interactions
0,1,10,41.089948,6
1,1,11,0.000000,4
2,1,12,0.000000,8
3,1,13,0.000000,4
4,1,14,44.761548,4
...,...,...,...,...
4603,6,823,0.000000,1
4604,6,824,0.000000,1
4605,6,825,0.000000,1
4606,6,826,0.000000,1


In [54]:
tourist_avg_waits["avg_wait_time"] = tourist_avg_waits["total_wait"] / tourist_avg_waits["num_interactions"]

tourist_avg_waits["avg_wait_time"]

0        6.848325
1        0.000000
2        0.000000
3        0.000000
4       11.190387
          ...    
4603     0.000000
4604     0.000000
4605     0.000000
4606     0.000000
4607     0.000000
Name: avg_wait_time, Length: 4608, dtype: float64

In [55]:
replication_avg = (
    tourist_avg_waits.groupby("replication_no")["avg_wait_time"]
    .mean()
    .reset_index()
    .rename(columns={"avg_wait_time": "average_wait_time_per_tourist"})
)
replication_avg

Unnamed: 0,replication_no,average_wait_time_per_tourist
0,1,9.020781
1,2,7.136661
2,3,18.89225
3,4,15.988182
4,5,12.135152
5,6,10.083035
