In [1]:
import os
import pandas as pd

def split_time_series_per_location(df, train_frac=0.7, val_frac=0.15, test_frac=0.15, output_folder="train_test_splits"):
    # Check fractions sum to 1
    if abs(train_frac + val_frac + test_frac - 1.0) > 1e-6:
        raise ValueError("Train, val and test fractions must sum to 1.")
    
    os.makedirs(output_folder, exist_ok=True)

    train_list = []
    val_list = []
    test_list = []

    # Split per location to preserve time order in each location group
    for location, group in df.groupby('location_id'):
        group = group.sort_values('date')
        n = len(group)
        train_end = int(n * train_frac)
        val_end = train_end + int(n * val_frac)

        train_list.append(group.iloc[:train_end])
        val_list.append(group.iloc[train_end:val_end])
        test_list.append(group.iloc[val_end:])

    train_df = pd.concat(train_list).reset_index(drop=True)
    val_df = pd.concat(val_list).reset_index(drop=True)
    test_df = pd.concat(test_list).reset_index(drop=True)

    # Save files
    train_path = os.path.join(output_folder, "train.csv")
    val_path = os.path.join(output_folder, "validate.csv")
    test_path = os.path.join(output_folder, "test.csv")

    train_df.to_csv(train_path, index=False)
    val_df.to_csv(val_path, index=False)
    test_df.to_csv(test_path, index=False)

    print(f"Train set shape: {train_df.shape} -> saved to {train_path}")
    print(f"Validation set shape: {val_df.shape} -> saved to {val_path}")
    print(f"Test set shape: {test_df.shape} -> saved to {test_path}")
    print(f"Data splits saved inside folder: {output_folder}")

    return train_df, val_df, test_df

if __name__ == "__main__":
    df = pd.read_csv("feature_engineered_data.csv", parse_dates=['date'])
    df = df.sort_values(['location_id', 'date'])
    train_df, val_df, test_df = split_time_series_per_location(df)


Train set shape: (99603, 37) -> saved to train_test_splits\train.csv
Validation set shape: (21330, 37) -> saved to train_test_splits\validate.csv
Test set shape: (21384, 37) -> saved to train_test_splits\test.csv
Data splits saved inside folder: train_test_splits
