In [1]:
import pandas as pd
import os

In [2]:
input_folder = 'location_wise_data'
output_folder = 'train_val_test_splits'
os.makedirs(output_folder, exist_ok=True)

location_files = os.listdir(input_folder)

In [3]:
def split_time_series(df, train_frac=0.7, val_frac=0.15, test_frac=0.15):
    n = len(df)
    train_end = int(n * train_frac)
    val_end = train_end + int(n * val_frac)
    
    train = df.iloc[:train_end]
    val = df.iloc[train_end:val_end]
    test = df.iloc[val_end:]
    
    return train, val, test


In [4]:
for file in location_files:
    if file.endswith('.csv'):
        location_name = file.replace('location_', '').replace('.csv', '')
        df = pd.read_csv(os.path.join(input_folder, file), parse_dates=['date'])
        
        # Sort by date just to be sure
        df = df.sort_values('date').reset_index(drop=True)
        
        # Split
        train, val, test = split_time_series(df)
        
        # Save splits
        train.to_csv(os.path.join(output_folder, f'{location_name}_train.csv'), index=False)
        val.to_csv(os.path.join(output_folder, f'{location_name}_val.csv'), index=False)
        test.to_csv(os.path.join(output_folder, f'{location_name}_test.csv'), index=False)
        
        print(f'Saved splits for {file}: train({len(train)}), val({len(val)}), test({len(test)})')


Saved splits for location_0.csv: train(3691), val(790), test(792)
Saved splits for location_1.csv: train(3691), val(790), test(792)
Saved splits for location_10.csv: train(3691), val(790), test(792)
Saved splits for location_11.csv: train(3691), val(790), test(792)
Saved splits for location_12.csv: train(3691), val(790), test(792)
Saved splits for location_13.csv: train(3691), val(790), test(792)
Saved splits for location_14.csv: train(3691), val(790), test(792)
Saved splits for location_15.csv: train(3691), val(790), test(792)
Saved splits for location_16.csv: train(3691), val(790), test(792)
Saved splits for location_17.csv: train(3691), val(790), test(792)
Saved splits for location_18.csv: train(3691), val(790), test(792)
Saved splits for location_19.csv: train(3691), val(790), test(792)
Saved splits for location_2.csv: train(3691), val(790), test(792)
Saved splits for location_20.csv: train(3691), val(790), test(792)
Saved splits for location_21.csv: train(3691), val(790), test(792