In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns

from pathlib import Path

project_root = Path("C:/Code/Junc")  # set your actual project root here
input_file = project_root / "data" / "uber_hackathon_v2_mock_data.xlsx"
output_file = project_root / "data" / "hourly_avg_combined_score.csv"




## Heuristica 1: Trip Score

In [11]:
def trip_score(earning, tips, km, alpha=0.5):
    """
    Compute a score for a trip based on revenue minus distance penalty.

    Parameters:
        earning (float): Net earning of the trip
        tips (float): Tips for the trip
        km (float): Distance traveled in kilometers
        alpha (float): Weight of distance penalty per km (default 0.5)

    Returns:
        float: Trip score
    """
    revenue = earning + tips
    score = revenue - alpha * km
    return score


## Compute average sum of trip scores of a driver in a certain hour of the week 


In [12]:

# --- Function to compute hour of the week ---
def compute_hour_of_week(timestamp):
    day_of_week = timestamp.dayofweek  # Monday=0
    hour = timestamp.hour
    return day_of_week * 24 + hour  # 0..167

# --- Main function for both rides and eats ---
def compute_hourly_avg_score_both(input_file, output_file, alpha=0.5):
    """
    Compute hourly average scores for both rides and eats earners
    and save to a single CSV.
    """
    combined_trips = []

    for sheet_name in ["rides_trips", "eats_orders"]:
        trips = pd.read_excel(input_file, sheet_name=sheet_name)
        trips['start_time'] = pd.to_datetime(trips['start_time'])

        if sheet_name == "rides_trips":
            trips['score'] = trip_score(trips['net_earnings'], trips['tips'], trips['distance_km'], alpha)
            trips = trips.rename(columns={'driver_id':'earner_id'})
        else:  # eats_orders
            trips['score'] = trip_score(trips['net_earnings'], trips['tip_eur'], trips['distance_km'], alpha)
            trips = trips.rename(columns={'courier_id':'earner_id'})

        trips['week'] = trips['start_time'].dt.isocalendar().week
        trips['job_hour'] = trips['start_time'].apply(compute_hour_of_week)
        combined_trips.append(trips[['earner_id','week','job_hour','score']])

    # Combine rides and eats
    all_trips = pd.concat(combined_trips, ignore_index=True)

    # Sum per earner, week, hour
    earner_week_hour = all_trips.groupby(['earner_id', 'week', 'job_hour']).agg(
        total_score=('score','sum')
    ).reset_index()

    # Average across weeks per earner per hour
    earner_hour_avg = earner_week_hour.groupby(['earner_id', 'job_hour']).agg(
        avg_score_per_hour=('total_score', 'mean')
    ).reset_index()

    # Average across earners per hour
    hourly_avg_score = earner_hour_avg.groupby('job_hour').agg(
        avg_score_per_earner=('avg_score_per_hour','mean')
    ).reset_index()

    # Save to CSV
    output_file = Path(output_file) 
    hourly_avg_score.to_csv(output_file, index=False)
    print(f"Saved combined hourly average scores (rides + eats) to: {output_file}")

    return hourly_avg_score


hourly_avg_all = compute_hourly_avg_score_both(input_path, output_path, alpha=0.5)


FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Code\\Junc\\Junction-2025\\data'

## Plot the average score of each hour

In [None]:

plt.figure(figsize=(16,6))
sns.set_style("whitegrid")

# Line plot
sns.lineplot(data=hourly_avg_all, x='job_hour', y='avg_score_per_earner', marker='o', color='royalblue')

# Labels and title
plt.xlabel("Hour of the Week (0=Monday 00:00)")
plt.ylabel("Average Score per Earner")
plt.title("Average Hourly Score per Earner Across the Week")
plt.xticks(ticks=range(0, 168, 12))  # show every 12th hour for readability
plt.xlim(0, 167)

# Optional: highlight weekdays vs weekends
plt.axvspan(0, 119, color='lightgray', alpha=0.1)  # Mon-Fri (0-119)
plt.axvspan(120, 167, color='orange', alpha=0.05)  # Sat-Sun (120-167)

plt.tight_layout()
plt.show()
