In [81]:
import matplotlib.pyplot as plt
import pandas as pd
import os

In [82]:
folder_path = "./Data"

### Question 1

You are to calculate the average waiting time for renting or returning a bicycle at each
station. If tourists should not wait more than one hour in total (both for renting bikes
and returning them), is the current system satisfactory?

In [83]:
csv_files = [f for f in os.listdir(folder_path) if "queue" in f]


len(csv_files)

8

In [84]:
all_waits = []

for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    
    # Drop empty columns
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Compute wait time
    df["wait_time"] = df["curr_WaitTime"] - df["prev_WaitTime"]
    
    # Keep relevant columns
    df = df[["replication_no", "id", "wait_time"]]
    
    all_waits.append(df)
    
all_waits

[       replication_no    id  wait_time
 0                   1    10        0.0
 1                   1    15        0.0
 2                   1    20        0.0
 3                   1    23        0.0
 4                   1    13        0.0
 ...               ...   ...        ...
 16115               6  3734        0.0
 16116               6  3643        0.0
 16117               6  3656        0.0
 16118               6  3725        0.0
 16119               6  3744        0.0
 
 [16120 rows x 3 columns],
        replication_no    id  wait_time
 0                   1    11        0.0
 1                   1    16        0.0
 2                   1    14        0.0
 3                   1    17        0.0
 4                   1    18        0.0
 ...               ...   ...        ...
 15996               6  3655        0.0
 15997               6  3722        0.0
 15998               6  3732        0.0
 15999               6  3731        0.0
 16000               6  3745        0.0
 
 [16001 r

In [85]:
combined_df = pd.concat(all_waits, ignore_index=True)

# Filtering all ones where we 0 waiting time
combined_df = combined_df[combined_df["wait_time"] > 0]

combined_df

Unnamed: 0,replication_no,id,wait_time
5338,2,3403,2.866366
5339,2,3483,0.593555
5340,2,3408,2.498028
5344,2,3490,2.001491
7581,3,2946,3.593585
...,...,...,...
127037,6,3674,148.638968
127038,6,3628,138.304881
127039,6,3660,136.408257
127040,6,3652,138.321089


In [86]:
def get_wait_times(replication_no, tourist_id, df):
    """
    Returns a NumPy array of all wait times for a given tourist ID and replication number.
    
    Parameters:
        replication_no (int): The replication number.
        tourist_id (int): The ID of the tourist.
        df (pd.DataFrame): The combined all_waits DataFrame.
    
    Returns:
        np.ndarray: Array of wait times for the given tourist in that replication.
    """
    filtered = df[(df["replication_no"] == replication_no) & (df["id"] == tourist_id)]
    return filtered["wait_time"].values

get_wait_times(1,10,combined_df)

array([ 1.644738, 11.707794, 27.737416])

In [87]:
tourist_avg_waits = (
    combined_df.groupby(["replication_no", "id"])
    .agg(total_wait=("wait_time", "sum"), num_interactions=("wait_time", "count"))
    .reset_index()
)
tourist_avg_waits

Unnamed: 0,replication_no,id,total_wait,num_interactions
0,1,10,41.089948,3
1,1,14,44.761548,2
2,1,15,33.910627,2
3,1,16,48.032236,2
4,1,17,40.781344,2
...,...,...,...,...
17653,6,3667,131.485924,1
17654,6,3669,141.768584,1
17655,6,3674,148.638968,1
17656,6,3676,142.827090,1


In [88]:
tourist_avg_waits["avg_wait_time"] = tourist_avg_waits["total_wait"] / tourist_avg_waits["num_interactions"]

tourist_avg_waits["avg_wait_time"]

0         13.696649
1         22.380774
2         16.955313
3         24.016118
4         20.390672
            ...    
17653    131.485924
17654    141.768584
17655    148.638968
17656    142.827090
17657    137.316829
Name: avg_wait_time, Length: 17658, dtype: float64

In [89]:
replication_avg = (
    tourist_avg_waits.groupby("replication_no")["avg_wait_time"]
    .mean()
    .reset_index()
    .rename(columns={"avg_wait_time": "average_wait_time_per_tourist"})
)
replication_avg

Unnamed: 0,replication_no,average_wait_time_per_tourist
0,1,94.495149
1,2,340.227298
2,3,237.282177
3,4,259.115249
4,5,135.3836
5,6,82.135496
