In [None]:
experiment_name_statistical_tests = 'Clip16_NIGHTS_Lora_Default_123'
vanilla_run = 'Clip16_Vanilla_Default_123'
run_one = 'Clip16_NIGHTS_Lora_Default_123_First'
run_two = 'Clip16_NIGHTS_Lora_Default_123_Second'
run_three = 'Clip16_NIGHTS_Lora_Default_123_Third'
test_parameters = {
  'paired_bootstrap_pooled': 5000,
  'paired_bootstrap_per_product': 5000
}

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import logging
import os
import sys
import yaml

PROJECT_ROOT = '/content/drive/MyDrive/perceptual-vits-fashion-forecasting'
sys.path.append(os.path.join(PROJECT_ROOT, 'src'))

# Logging
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')

Mounted at /content/drive


In [None]:
# Load config_vanilla
config_vanilla_path = os.path.join(PROJECT_ROOT, 'configs', f"{vanilla_run}.yaml")
if not os.path.isfile(config_vanilla_path):
    raise FileNotFoundError(f"config_vanilla file not found: {config_vanilla_path}")

with open(config_vanilla_path, 'r') as file:
    config_vanilla = yaml.safe_load(file)


from data.load_visuelle import load_visuelle
from data.process_visuelle import process_visuelle

# Process Visuelle2 data
results_process_visuelle = process_visuelle(
    season=config_vanilla['data']['season'],
    split_method=config_vanilla['data']['split_method'],
    melt_data=config_vanilla['global']['melt_data'],
    project_root=config_vanilla['global']['project_root']
)

# Load the embedding files for vanilla model
embeddings_dir = os.path.join(config_vanilla['global']['project_root'], 'datasets', 'visuelle2', 'embeddings')
os.makedirs(embeddings_dir, exist_ok=True)

if config_vanilla['vision_model']['model_family'] == 'vit':
  experiment_name = f"{str(config_vanilla['vision_model']['tag'])}_{str(config_vanilla['vision_model']['model_type'])}_{str(config_vanilla['vision_model']['training_method'])}_{str(config_vanilla['vision_model']['vit']['feat_type'])}_{str(config_vanilla['vision_model']['vit']['stride'])}_{str(config_vanilla['vision_model']['dataset_name'])}"
elif config_vanilla['vision_model']['model_family']  =='cnn':
  experiment_name = f"{str(config_vanilla['vision_model']['tag'])}_{str(config_vanilla['vision_model']['model_type'])}_{str(config_vanilla['vision_model']['training_method'])}_{str(config_vanilla['vision_model']['dataset_name'])}"

embedding_filename = f'{experiment_name}_embeddings.csv'
emb_filepath_vanilla = os.path.join(embeddings_dir, embedding_filename)

if not os.path.isfile(emb_filepath_vanilla):
  raise FileNotFoundError(f"Embedding file not found: {emb_filepath_vanilla}")
else:
  print(f"Embedding file found: {emb_filepath_vanilla}")


# Load dataset with embeddings
dict_load_vanilla = load_visuelle(
    process_data_dict=results_process_visuelle,
    embedding_path=emb_filepath_vanilla,
    using_year_int=config_vanilla['features']['using_year_int'],
    using_year_dummies=config_vanilla['features']['using_year_dummies'],
    using_season_dummies=config_vanilla['features']['using_season_dummies'],
    using_price_float=config_vanilla['features']['using_price_float'],
    using_category_dummies=config_vanilla['features']['using_category_dummies'],
    using_color_dummies=config_vanilla['features']['using_color_dummies'],
    using_fabric_dummies=config_vanilla['features']['using_fabric_dummies'],
    using_store_int=config_vanilla['features']['using_store_int'],
    using_store_dummies=config_vanilla['features']['using_store_dummies'],
    using_week_dummies=config_vanilla['features']['using_week_dummies'],
    pca=config_vanilla['features']['pca'],
    n_components=config_vanilla['features']['n_components'],
    visualize_pca=config_vanilla['features']['visualize_pca'],
    melt_data=config_vanilla['global']['melt_data'],
    dummy_normalization=config_vanilla['forecasting']['knn']['dummy_normalization'],
    normalize_embeddings_manually=config_vanilla['forecasting']['knn']['normalize_embeddings_manually'],
    project_root=config_vanilla['global']['project_root']
)


# Load config_run_one
config_one_path = os.path.join(PROJECT_ROOT, 'configs', f"{run_one}.yaml")
if not os.path.isfile(config_one_path):
    raise FileNotFoundError(f"config_one file not found: {config_one_path}")

with open(config_one_path, 'r') as file:
    config_one = yaml.safe_load(file)


# Load the embedding files for first run model
embeddings_dir = os.path.join(config_one['global']['project_root'], 'datasets', 'visuelle2', 'embeddings')
os.makedirs(embeddings_dir, exist_ok=True)

if config_one['vision_model']['model_family'] == 'vit':
  experiment_name = f"{str(config_one['vision_model']['tag'])}_{str(config_one['vision_model']['model_type'])}_{str(config_one['vision_model']['training_method'])}_{str(config_one['vision_model']['vit']['feat_type'])}_{str(config_one['vision_model']['vit']['stride'])}_{str(config_one['vision_model']['dataset_name'])}"
elif config_one['vision_model']['model_family']  =='cnn':
  experiment_name = f"{str(config_one['vision_model']['tag'])}_{str(config_one['vision_model']['model_type'])}_{str(config_one['vision_model']['training_method'])}_{str(config_one['vision_model']['dataset_name'])}"

embedding_filename = f'{experiment_name}_embeddings.csv'
emb_filepath_run_one = os.path.join(embeddings_dir, embedding_filename)

if not os.path.isfile(emb_filepath_run_one):
  raise FileNotFoundError(f"Embedding file not found: {emb_filepath_run_one}")
else:
  print(f"Embedding file found: {emb_filepath_run_one}")


# Load dataset with embeddings
dict_load_run_one = load_visuelle(
    process_data_dict=results_process_visuelle,
    embedding_path=emb_filepath_run_one,
    using_year_int=config_one['features']['using_year_int'],
    using_year_dummies=config_one['features']['using_year_dummies'],
    using_season_dummies=config_one['features']['using_season_dummies'],
    using_price_float=config_one['features']['using_price_float'],
    using_category_dummies=config_one['features']['using_category_dummies'],
    using_color_dummies=config_one['features']['using_color_dummies'],
    using_fabric_dummies=config_one['features']['using_fabric_dummies'],
    using_store_int=config_one['features']['using_store_int'],
    using_store_dummies=config_one['features']['using_store_dummies'],
    using_week_dummies=config_one['features']['using_week_dummies'],
    pca=config_one['features']['pca'],
    n_components=config_one['features']['n_components'],
    visualize_pca=config_one['features']['visualize_pca'],
    melt_data=config_one['global']['melt_data'],
    dummy_normalization=config_one['forecasting']['knn']['dummy_normalization'],
    normalize_embeddings_manually=config_one['forecasting']['knn']['normalize_embeddings_manually'],
    project_root=config_one['global']['project_root']
)


# Load config_run_two
config_two_path = os.path.join(PROJECT_ROOT, 'configs', f"{run_two}.yaml")
if not os.path.isfile(config_two_path):
    raise FileNotFoundError(f"config_two file not found: {config_two_path}")

with open(config_two_path, 'r') as file:
    config_two = yaml.safe_load(file)


# Load the embedding files for first run model
embeddings_dir = os.path.join(config_two['global']['project_root'], 'datasets', 'visuelle2', 'embeddings')
os.makedirs(embeddings_dir, exist_ok=True)

if config_two['vision_model']['model_family'] == 'vit':
  experiment_name = f"{str(config_two['vision_model']['tag'])}_{str(config_two['vision_model']['model_type'])}_{str(config_two['vision_model']['training_method'])}_{str(config_two['vision_model']['vit']['feat_type'])}_{str(config_two['vision_model']['vit']['stride'])}_{str(config_two['vision_model']['dataset_name'])}"
elif config_two['vision_model']['model_family']  =='cnn':
  experiment_name = f"{str(config_two['vision_model']['tag'])}_{str(config_two['vision_model']['model_type'])}_{str(config_two['vision_model']['training_method'])}_{str(config_two['vision_model']['dataset_name'])}"

embedding_filename = f'{experiment_name}_embeddings.csv'
emb_filepath_run_two = os.path.join(embeddings_dir, embedding_filename)

if not os.path.isfile(emb_filepath_run_two):
  raise FileNotFoundError(f"Embedding file not found: {emb_filepath_run_two}")
else:
  print(f"Embedding file found: {emb_filepath_run_two}")


# Load dataset with embeddings
dict_load_run_two = load_visuelle(
    process_data_dict=results_process_visuelle,
    embedding_path=emb_filepath_run_two,
    using_year_int=config_two['features']['using_year_int'],
    using_year_dummies=config_two['features']['using_year_dummies'],
    using_season_dummies=config_two['features']['using_season_dummies'],
    using_price_float=config_two['features']['using_price_float'],
    using_category_dummies=config_two['features']['using_category_dummies'],
    using_color_dummies=config_two['features']['using_color_dummies'],
    using_fabric_dummies=config_two['features']['using_fabric_dummies'],
    using_store_int=config_two['features']['using_store_int'],
    using_store_dummies=config_two['features']['using_store_dummies'],
    using_week_dummies=config_two['features']['using_week_dummies'],
    pca=config_two['features']['pca'],
    n_components=config_two['features']['n_components'],
    visualize_pca=config_two['features']['visualize_pca'],
    melt_data=config_two['global']['melt_data'],
    dummy_normalization=config_two['forecasting']['knn']['dummy_normalization'],
    normalize_embeddings_manually=config_two['forecasting']['knn']['normalize_embeddings_manually'],
    project_root=config_two['global']['project_root']
)


# Load config_run_three
config_three_path = os.path.join(PROJECT_ROOT, 'configs', f"{run_three}.yaml")
if not os.path.isfile(config_three_path):
    raise FileNotFoundError(f"config_three file not found: {config_three_path}")

with open(config_three_path, 'r') as file:
    config_three = yaml.safe_load(file)


# Load the embedding files for first run model
embeddings_dir = os.path.join(config_three['global']['project_root'], 'datasets', 'visuelle2', 'embeddings')
os.makedirs(embeddings_dir, exist_ok=True)

if config_three['vision_model']['model_family'] == 'vit':
  experiment_name = f"{str(config_three['vision_model']['tag'])}_{str(config_three['vision_model']['model_type'])}_{str(config_three['vision_model']['training_method'])}_{str(config_three['vision_model']['vit']['feat_type'])}_{str(config_three['vision_model']['vit']['stride'])}_{str(config_three['vision_model']['dataset_name'])}"
elif config_three['vision_model']['model_family']  =='cnn':
  experiment_name = f"{str(config_three['vision_model']['tag'])}_{str(config_three['vision_model']['model_type'])}_{str(config_three['vision_model']['training_method'])}_{str(config_three['vision_model']['dataset_name'])}"

embedding_filename = f'{experiment_name}_embeddings.csv'
emb_filepath_run_three = os.path.join(embeddings_dir, embedding_filename)

if not os.path.isfile(emb_filepath_run_three):
  raise FileNotFoundError(f"Embedding file not found: {emb_filepath_run_three}")
else:
  print(f"Embedding file found: {emb_filepath_run_three}")


# Load dataset with embeddings
dict_load_run_three = load_visuelle(
    process_data_dict=results_process_visuelle,
    embedding_path=emb_filepath_run_three,
    using_year_int=config_three['features']['using_year_int'],
    using_year_dummies=config_three['features']['using_year_dummies'],
    using_season_dummies=config_three['features']['using_season_dummies'],
    using_price_float=config_three['features']['using_price_float'],
    using_category_dummies=config_three['features']['using_category_dummies'],
    using_color_dummies=config_three['features']['using_color_dummies'],
    using_fabric_dummies=config_three['features']['using_fabric_dummies'],
    using_store_int=config_three['features']['using_store_int'],
    using_store_dummies=config_three['features']['using_store_dummies'],
    using_week_dummies=config_three['features']['using_week_dummies'],
    pca=config_three['features']['pca'],
    n_components=config_three['features']['n_components'],
    visualize_pca=config_three['features']['visualize_pca'],
    melt_data=config_three['global']['melt_data'],
    dummy_normalization=config_three['forecasting']['knn']['dummy_normalization'],
    normalize_embeddings_manually=config_three['forecasting']['knn']['normalize_embeddings_manually'],
    project_root=config_three['global']['project_root']
)

[INFO] NumExpr defaulting to 2 threads.
[INFO] Starting Visuelle2 data processing...
[INFO] All processed files already exist. Skipping processing.
[INFO] Loading and preparing Visuelle2 dataset...


Embedding file found: /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip32_NIGHTS_FT_MLP_First_Model_clip_vitb32_mlp_embedding_32_nights_fashion_triplets_embeddings.csv


[INFO] Load training data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_train.csv
[INFO] Load validation data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_val.csv
[INFO] Load test data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_test.csv
[INFO] Load embeddings from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip32_NIGHTS_FT_MLP_First_Model_clip_vitb32_mlp_embedding_32_nights_fashion_triplets_embeddings.csv
[INFO] Length of training data before merging is 51444 rows.
[INFO] Length of training data after merging is 51432 rows.
[INFO] Length of validation data before merging is 6408 rows.
[INFO] Length of validation data after merging is 6408 rows.
[INFO] Length of testing

Embedding file found: /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip16_NIGHTS_FT_MLP_First_Model_clip_vitb16_mlp_embedding_16_nights_fashion_triplets_embeddings.csv


[INFO] Load training data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_train.csv
[INFO] Load validation data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_val.csv
[INFO] Load test data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_test.csv
[INFO] Load embeddings from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip16_NIGHTS_FT_MLP_First_Model_clip_vitb16_mlp_embedding_16_nights_fashion_triplets_embeddings.csv
[INFO] Length of training data before merging is 51444 rows.
[INFO] Length of training data after merging is 51432 rows.
[INFO] Length of validation data before merging is 6408 rows.
[INFO] Length of validation data after merging is 6408 rows.
[INFO] Length of testing

Embedding file found: /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip16_NIGHTS_FT_MLP_Second_Model_clip_vitb16_mlp_embedding_16_nights_fashion_triplets_embeddings.csv


[INFO] Load training data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_train.csv
[INFO] Load validation data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_val.csv
[INFO] Load test data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_test.csv
[INFO] Load embeddings from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip16_NIGHTS_FT_MLP_Second_Model_clip_vitb16_mlp_embedding_16_nights_fashion_triplets_embeddings.csv
[INFO] Length of training data before merging is 51444 rows.
[INFO] Length of training data after merging is 51432 rows.
[INFO] Length of validation data before merging is 6408 rows.
[INFO] Length of validation data after merging is 6408 rows.
[INFO] Length of testin

Embedding file found: /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip16_NIGHTS_FT_MLP_Third_Model_clip_vitb16_mlp_embedding_16_nights_fashion_triplets_embeddings.csv


[INFO] Load training data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_train.csv
[INFO] Load validation data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_val.csv
[INFO] Load test data from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/processed_data/melt_true_aw19_default_split/visuelle2_test.csv
[INFO] Load embeddings from /content/drive/MyDrive/perceptual-vits-fashion-forecasting/datasets/visuelle2/embeddings/Clip16_NIGHTS_FT_MLP_Third_Model_clip_vitb16_mlp_embedding_16_nights_fashion_triplets_embeddings.csv
[INFO] Length of training data before merging is 51444 rows.
[INFO] Length of training data after merging is 51432 rows.
[INFO] Length of validation data before merging is 6408 rows.
[INFO] Length of validation data after merging is 6408 rows.
[INFO] Length of testing

In [None]:
from utils_prediction.statistical_tests import test_for_significance

test_for_significance(
    dict_load_vanilla = dict_load_vanilla,
    dict_load_run_one = dict_load_run_one,
    dict_load_run_two = dict_load_run_two,
    dict_load_run_three = dict_load_run_three,
    experiment_name = experiment_name_statistical_tests,
    vanilla_run_name = vanilla_run,
    run_one_name = run_one,
    run_two_name = run_two,
    run_three_name = run_three,
    test_parameters = test_parameters,
    random_seed = 123,
    project_root = PROJECT_ROOT,
    scalar = 53.0)

[INFO] Testing for significance...
[INFO] Saved global results at /content/drive/MyDrive/perceptual-vits-fashion-forecasting/experiments/statistical_tests/Is CLIP 16 better than CLIP 32 in NIGHTS/global_results_gradient_boosting_regressor_model.csv
[INFO] Saved paired bootstrap pooled results at /content/drive/MyDrive/perceptual-vits-fashion-forecasting/experiments/statistical_tests/Is CLIP 16 better than CLIP 32 in NIGHTS/paired_bootstrap_pooled_gradient_boosting_regressor_model.csv
[INFO] Saved paired bootstrap per-product results at /content/drive/MyDrive/perceptual-vits-fashion-forecasting/experiments/statistical_tests/Is CLIP 16 better than CLIP 32 in NIGHTS/paired_bootstrap_per_product_gradient_boosting_regressor_model.csv
[INFO] Saved Wilcoxon per-product results at /content/drive/MyDrive/perceptual-vits-fashion-forecasting/experiments/statistical_tests/Is CLIP 16 better than CLIP 32 in NIGHTS/wilcoxon_per_product_gradient_boosting_regressor_model.csv
[INFO] Saved weekly vanilla