In [1]:
def split_time_series(df, train_frac=0.7, val_frac=0.15, test_frac=0.15):
    n = len(df)
    train_end = int(n * train_frac)
    val_end = train_end + int(n * val_frac)
    
    train = df.iloc[:train_end]
    val = df.iloc[train_end:val_end]
    test = df.iloc[val_end:]
    
    return train, val, test


In [2]:
import pandas as pd
import os

input_folder = 'city_wise_data'
output_folder = 'city_wise_data_splits'
os.makedirs(output_folder, exist_ok=True)

cities_files = os.listdir(input_folder)

for file in cities_files:
    if file.endswith('.csv'):
        city_name = file.replace('cleaned_weather_', '').replace('.csv', '')
        df = pd.read_csv(os.path.join(input_folder, file), parse_dates=['date'])
        
        # Sort by date just to be sure
        df = df.sort_values('date').reset_index(drop=True)
        
        # Split
        train, val, test = split_time_series(df)
        
        # Save splits
        train.to_csv(os.path.join(output_folder, f'{city_name}_train.csv'), index=False)
        val.to_csv(os.path.join(output_folder, f'{city_name}_val.csv'), index=False)
        test.to_csv(os.path.join(output_folder, f'{city_name}_test.csv'), index=False)
        
        print(f'Saved splits for {city_name}: train({len(train)}), val({len(val)}), test({len(test)})')


Saved splits for Ampara: train(3691), val(790), test(792)
Saved splits for Anuradhapura: train(3691), val(790), test(792)
Saved splits for Badulla: train(3691), val(790), test(792)
Saved splits for Bandarawela: train(3691), val(790), test(792)
Saved splits for Batticaloa: train(3691), val(790), test(792)
Saved splits for Colombo: train(3691), val(790), test(792)
Saved splits for Galle: train(3691), val(790), test(792)
Saved splits for Gampaha: train(3691), val(790), test(792)
Saved splits for Hambantota: train(3691), val(790), test(792)
Saved splits for Jaffna: train(3691), val(790), test(792)
Saved splits for Kalutara: train(3691), val(790), test(792)
Saved splits for Kandy: train(3691), val(790), test(792)
Saved splits for Kegalle: train(3691), val(790), test(792)
Saved splits for Kilinochchi1: train(3691), val(790), test(792)
Saved splits for Kurunegala: train(3691), val(790), test(792)
Saved splits for Mannar: train(3691), val(790), test(792)
Saved splits for Matale: train(3691), v