In [1]:
import pandas as pd
import os
import json

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
FEATURES_FILE = "/content/drive/MyDrive/Edulift/eVED_features.csv"
SPLIT_DIR = "/content/drive/MyDrive/Edulift/T7/data/splits"
os.makedirs(SPLIT_DIR, exist_ok=True)

In [4]:
df = pd.read_csv(FEATURES_FILE)

In [5]:
len(df)

655

In [6]:
df['start_time'] = pd.to_datetime(df['start_time'], errors='coerce')

In [7]:
df = df.sort_values('start_time').reset_index(drop=True)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 655 entries, 0 to 654
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Trip               655 non-null    float64       
 1   start_time         655 non-null    datetime64[ns]
 2   end_time           655 non-null    object        
 3   duration_min       655 non-null    float64       
 4   distance_km        653 non-null    float64       
 5   avg_speed_kmh      653 non-null    float64       
 6   matched_lat_start  655 non-null    float64       
 7   matched_lon_start  655 non-null    float64       
 8   od_cluster_start   655 non-null    int64         
 9   matched_lat_end    655 non-null    float64       
 10  matched_lon_end    655 non-null    float64       
 11  od_cluster_end     655 non-null    int64         
dtypes: datetime64[ns](1), float64(8), int64(2), object(1)
memory usage: 61.5+ KB


In [9]:
n = len(df)

In [10]:
train_end = int(0.7 * n)
val_end = int(0.85 * n)

In [11]:
print(train_end)
print(val_end)

458
556


In [12]:
train = df.iloc[:train_end]
val = df.iloc[train_end:val_end]
test = df.iloc[val_end:]

In [13]:
train.to_csv(os.path.join(SPLIT_DIR, "train.csv"), index=False)
val.to_csv(os.path.join(SPLIT_DIR, "val.csv"), index=False)
test.to_csv(os.path.join(SPLIT_DIR, "test.csv"), index=False)

In [14]:
print("Completed Data splittion!")

Completed Data splittion!


In [15]:
eval_criteria = {
    "matching_success_rate": {
        "definition": "Proportion of trips successfully matched into shared rides",
        "formula": "matched_trips / total_trips"
    },
    "detour_ratio": {
        "definition": "Avg extra distance/time (%) compared to solo trips",
        "formula": "(shared_distance - solo_distance) / solo_distance"
    },
    "avg_wait_time_min": {
        "definition": "Estimated avg passenger wait time before pickup",
        "note": "Can be simulated if schedule data exists"
    },
    "fleet_efficiency": {
        "definition": "Avg occupancy (passengers per vehicle per trip)",
        "formula": "total_passenger_km / total_vehicle_km"
    },
    "carbon_savings": {
        "definition": "CO₂ reduction compared to all trips done solo",
        "formula": "baseline_co2 - shared_co2"
    }
}

In [16]:
with open(os.path.join(SPLIT_DIR, "eval_config.json"), "w") as f:
    json.dump(eval_criteria, f, indent=2)

In [17]:
print("Evaluation criteria saved as eval_config.json!")

Evaluation criteria saved as eval_config.json!
