In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
"""
exp_configs.ipynb

This script defines and saves the configuration for running forecasting experiments.
It specifies:
    - Global settings (experiment name, project root, random seed, melting option)
    - Dataset information (season and split method)
    - Embedding model configuration
    - Feature selection and PCA settings
    - Forecasting model parameters (KNN, Gradient Boosting, Random Forest, Ridge)

The configuration is saved as a YAML file under the configs directory so that it
can be easily loaded by other scripts for reproducibility and consistent experiment runs.
"""

import os
import yaml
import logging

logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')

config = {
    'global': {
        'project_name': 'sales-forecasting',
        'experiment_name': 'Clip32_FT_MLP_Default_123_First', # Naming style: Clip32_Vanilla_Default_123
        'forecasting_seed': 123, # Options: 123, 456, 789
        'melt_data': True,
        'wandb_bool': True,
        'project_root': '/content/drive/MyDrive/perceptual-vits-fashion-forecasting'
    },

    'data': {
        'season': 'AW19', # Options: SS19, AW19
        'split_method': 'default' # Options: season, release_date, standard, week, default
    },

    'features': {
        'using_year_int': False,
        'using_year_dummies': False,
        'using_season_dummies': False,
        'using_price_float': False,
        'using_category_dummies': False,
        'using_color_dummies': False,
        'using_fabric_dummies': False,
        'using_store_int': False,
        'using_store_dummies': True,
        'using_week_dummies': True,
        'pca': True,
        'n_components': 256,
        'visualize_pca': False
    },

    'forecasting': {
        'knn': {
            'enabled': True,
            'dummy_normalization': True,
            'normalize_embeddings_manually': True,
            'error_metric': 'mse',
            'max_neighbors': 200,
            'weights': ['uniform', 'distance']
        },
        'gradient_boosting': {
            'enabled': True,
            'dummy_normalization': True,
            'normalize_embeddings_manually': True,
            'error_metric': 'mse',
            'n_trials': 40,
            'n_estimators': [300, 500, 700, 900],
            'learning_rate': [0.05, 0.07, 0.09, 0.11],
            'max_depth': [5, 7, 9, 11],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 3, 5]
        },
        'random_forest': {
            'enabled': True,
            'dummy_normalization': True,
            'normalize_embeddings_manually': True,
            'error_metric': 'mse',
            'n_trials': 40,
            'n_estimators': [300, 500, 700, 900],
            'max_depth': [7, 9, 11, 13],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 3, 5]
        },
        'ridge': {
            'enabled': True,
            'dummy_normalization': True,
            'normalize_embeddings_manually': True,
            'error_metric': 'mse',
            'alpha_values': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
        }
    },

    'vision_model':{
        'wandb_project': 'vision-model-training',
        'vision_model_seed': 123, # Options 123, 456, 789
        'model_family': 'vit', # Options: vit, cnn
        'model_type': 'clip_vitb32', # Naming style: clip_vitb32 or resnet50
        'train': True,
        'load_vit': False,
        'tag': 'Clip32_FT_MLP_First_Model', # Naming style: Clip32_Vanilla_First_Model
        'training_method': 'mlp', # Options: mlp, lora, no_training
        'dataset_name': 'fashion_triplets', # Options: none, nights, fashion_triplets, nights_fashion_triplets, synthetic_fashion
        'hidden_size': 512,
        'best': True,

        'vit':{
            'log_dir': '/vision_models/vits_training',
            'load_dir': '/vision_models/vits_backbone_models',
            'save_mode': 'entire_model', # Options: adapter_only, entire_model, all
            'vision_model_training_name': 'Clip32_FT_MLP_First_Model', # Naming style: Clip32_Vanilla_First_Model
            'wandb_notes': 'Training run for the MLP Clip32 model on FT data.', # Naming style: Training run for the Vanilla Clip32 model on no data. or Training run for the Lora Clip32 model on NIGHTS data.

            'feat_type': 'embedding',
            'stride': '32', # Options 16 or 32
            'use_lora': False,
            'normalize_embeds': True,
            'load_size': 224,

            'dataset_root': '/content/datasets/fashion_triplets/', # Options: /content/datasets/nights/, /content/datasets/fashion_triplets/
            'second_dataset_root': None, # Options: /content/datasets/fashion_triplets/

            'lr': 0.0001, # Big data: 0.0003, Small data: 0.0001
            'weight_decay': 0.0001, # Big data: 0.0, Small data: 0.0001
            'batch_size': 128, # Big data Lora: 32, Small data Lora: 16, Big data MLP: 512, Small data MLP: 128
            'epochs': 40, # Big data Lora: 8, Small data Lora: 25, Big data MLP: 20, Small data MLP: 30
            'margin': 0.05, # Options: 0.05
            'patience': 5, # Big data: 3, Small data: 5
            'min_delta': 0.0, # Options: 0.0

            'lora_r': 16, # Big data: 16, Small data: 8
            'lora_alpha': 32, # Big data: 32, Small data: 8
            'lora_dropout': 0.2 # Big data: 0.02, Small data: 0.01
        },

        'cnn': {
            'log_dir': '/vision_models/cnn_training',
            'load_dir': '/vision_models/cnn_backbone_models',
            'vision_model_training_name': 'Resnet50_Vanilla_First_Model', # Naming style: Clip50_Vanilla_First_Model
            'wandb_notes': 'Training run for the Vanilla ResNet50 model on no data.', # Naming style: Training run for the Vanilla ResNet50 model on no data. or Training run for the ResNet50 model on NIGHTS data.

            'mlp': True,
            'normalize_embeds': True,
            'load_size': 224,

            'dataset_root': None, # Options: /content/datasets/nights/, /content/datasets/fashion_triplets/
            'second_dataset_root': None, # Options: /content/datasets/fashion_triplets/

            'lr': 0.0003, # Big data: 0.0003, Small data: 0.0001
            'weight_decay': 0.0001, # Big data: 0.0001, Small data: 0.0001
            'batch_size': 64, # Big data: 64, Small data: 32
            'epochs': 20, # Big data: 20, Small data: 40
            'margin': 0.05, # Options 0.05
            'patience': 3, # # Big data: 3, Small data: 5
            'min_delta': 0.0 # Options: 0
        }
    }
}

config_dir = '/content/drive/MyDrive/perceptual-vits-fashion-forecasting/configs'
os.makedirs(config_dir, exist_ok=True)
config_path = os.path.join(config_dir, f"{config['global']['experiment_name']}.yaml")

with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

logging.info(f"Config saved at: {config_path}")