In [17]:
from sklearn.linear_model import LinearRegression
from pathlib import Path
import pandas as pd
import numpy as np
import os

In [18]:
root = Path(os.getcwd()).parent
competition_data_path = root / "data/competition/competition_data/"
results_root = root / "predictions/competition_baselines"

In [19]:
def save_df(test_df, data_path, competition_data_path, results_save_path):
    save_filepath = results_save_path / data_path.relative_to(competition_data_path)
    save_filepath.parent.mkdir(parents=True, exist_ok=True)
    test_df.round(3).to_csv(save_filepath, index=False)

# Average predictions

In [20]:
def get_subjects_paths(data_path):
    subjects_paths = dict()
    for file_path in data_path.glob("**/train/annotations/*.csv"):
        subject_num = file_path.stem.split("_")[1]
        subjects_paths.setdefault(subject_num, list())
        subjects_paths[subject_num].append(file_path)
    return subjects_paths

def get_videos_paths(data_path):
    videos_paths = dict()
    for file_path in data_path.glob("**/train/annotations/*.csv"):
        vid_num = file_path.stem.split("_")[3]
        videos_paths.setdefault(vid_num, list())
        videos_paths[vid_num].append(file_path)
    return videos_paths

def iter_subjects_videos_paths(scenario_path, search_pattern="**/train/annotations/*.csv"):
    for data_path in scenario_path.glob(pattern=search_pattern):
        yield data_path

def iter_train_subjects_and_get_paths(scenario_path):
    subjects_paths = get_subjects_paths(scenario_path)
    for subject_num, subjects_paths_list in subjects_paths.items():
        yield subject_num, subjects_paths_list

def iter_train_videos_and_get_paths(scenario_path):
    videos_paths = get_videos_paths(scenario_path)
    for video_num, videos_paths_list in videos_paths.items():
        yield video_num, videos_paths_list

## Scenario 1

In [21]:
scenario1_data_path = competition_data_path / "scenario_1"

### Scenario-wise model

In [22]:
# prepare paths
model_name = "fold-wise_mean"
results_save_path = results_root / model_name / "results"

# compute baseline
arousal_results = list()
valence_results = list()
for train_path in iter_subjects_videos_paths(scenario1_data_path):
    mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
    arousal_results.append(mean["arousal"])
    valence_results.append(mean["valence"])
overall_arousal_mean = np.mean(arousal_results)
overall_valence_mean = np.mean(valence_results)

# save predictions
for test_path in iter_subjects_videos_paths(scenario1_data_path, search_pattern="**/test/annotations/*.csv"):
    test_df = pd.read_csv(test_path)
    test_df["arousal"] = overall_arousal_mean
    test_df["valence"] = overall_valence_mean
    save_df(test_df, test_path, competition_data_path, results_save_path)

### Subject-wise model

In [23]:
# prepare paths
model_name = "subject-wise_mean"
results_save_path = results_root / model_name / "results"

# compute baseline
for subject_num, paths_list in iter_train_subjects_and_get_paths(scenario1_data_path):
    subject_arousal_results = list()
    subject_valence_results = list()
    for train_path in paths_list:
        mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
        subject_arousal_results.append(mean["arousal"])
        subject_valence_results.append(mean["valence"])
    subject_arousal_mean = np.mean(subject_arousal_results)
    subject_valence_mean = np.mean(subject_valence_results)
    # save predictions
    for test_path in iter_subjects_videos_paths(scenario1_data_path, search_pattern=f"**/test/annotations/sub_{subject_num}*.csv"):
        test_df = pd.read_csv(test_path)
        test_df["arousal"] = subject_arousal_mean
        test_df["valence"] = subject_valence_mean
        save_df(test_df, test_path, competition_data_path, results_save_path)

### Video-wise model

In [24]:
# prepare paths
model_name = "video-wise_mean"
results_save_path = results_root / model_name / "results"

# compute baseline
for video_num, paths_list in iter_train_videos_and_get_paths(scenario1_data_path):
    video_arousal_results = list()
    video_valence_results = list()
    for train_path in paths_list:
        mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
        video_arousal_results.append(mean["arousal"])
        video_valence_results.append(mean["valence"])
    video_arousal_mean = np.mean(video_arousal_results)
    video_valence_mean = np.mean(video_valence_results)
    # save predictions
    for test_path in iter_subjects_videos_paths(scenario1_data_path, search_pattern=f"**/test/annotations/*vid_{video_num}.csv"):
        test_df = pd.read_csv(test_path)
        test_df["arousal"] = video_arousal_mean
        test_df["valence"] = video_valence_mean
        save_df(test_df, test_path, competition_data_path, results_save_path)

### Subject-video-wise model

In [25]:
# prepare paths
model_name = "subvid-wise_mean"
results_save_path = results_root / model_name / "results"

# compute baseline
arousal_results = list()
valence_results = list()
for train_path in iter_subjects_videos_paths(scenario1_data_path):
    mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
    mean_arousal, mean_valence = mean["arousal"], mean["valence"]
    # save predictions
    test_path = Path(str(train_path).replace("train", "test"))
    test_df = pd.read_csv(test_path)
    test_df["arousal"] = mean_arousal
    test_df["valence"] = mean_valence
    save_df(test_df, test_path, competition_data_path, results_save_path)

## Scenario 2

In [26]:
scenario2_data_path = competition_data_path / "scenario_2"

### Fold-wise model

In [27]:
# prepare paths
model_name = "fold-wise_mean"
results_save_path = results_root / model_name / "results"

for fold_num in range(5):
    fold_data_path = scenario2_data_path / f"fold_{fold_num}"
    # compute baseline
    arousal_results = list()
    valence_results = list()
    for train_path in iter_subjects_videos_paths(fold_data_path):
        mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
        arousal_results.append(mean["arousal"])
        valence_results.append(mean["valence"])
    overall_arousal_mean = np.mean(arousal_results)
    overall_valence_mean = np.mean(valence_results)

    # save predictions
    for test_path in iter_subjects_videos_paths(fold_data_path, search_pattern="**/test/annotations/*.csv"):
        test_df = pd.read_csv(test_path)
        test_df["arousal"] = overall_arousal_mean
        test_df["valence"] = overall_valence_mean
        save_df(test_df, test_path, competition_data_path, results_save_path)

### Video-wise model

In [28]:
# prepare paths
model_name = "video-wise_mean"
results_save_path = results_root / model_name / "results"

for fold_num in range(5):
    fold_data_path = scenario2_data_path / f"fold_{fold_num}"
    # compute baseline
    for video_num, paths_list in iter_train_videos_and_get_paths(fold_data_path):
        video_arousal_results = list()
        video_valence_results = list()
        for train_path in paths_list:
            mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
            video_arousal_results.append(mean["arousal"])
            video_valence_results.append(mean["valence"])
        video_arousal_mean = np.mean(video_arousal_results)
        video_valence_mean = np.mean(video_valence_results)
        # save predictions
        for test_path in iter_subjects_videos_paths(fold_data_path, search_pattern=f"**/test/annotations/*vid_{video_num}.csv"):
            test_df = pd.read_csv(test_path)
            test_df["arousal"] = video_arousal_mean
            test_df["valence"] = video_valence_mean
            save_df(test_df, test_path, competition_data_path, results_save_path)

## Scenario 3

In [29]:
scenario3_data_path = competition_data_path / "scenario_3"

### Fold-wise model

In [30]:
# prepare paths
model_name = "fold-wise_mean"
results_save_path = results_root / model_name / "results"

for fold_num in range(4):
    fold_data_path = scenario3_data_path / f"fold_{fold_num}"
    # compute baseline
    arousal_results = list()
    valence_results = list()
    for train_path in iter_subjects_videos_paths(fold_data_path):
        mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
        arousal_results.append(mean["arousal"])
        valence_results.append(mean["valence"])
    overall_arousal_mean = np.mean(arousal_results)
    overall_valence_mean = np.mean(valence_results)

    # save predictions
    for test_path in iter_subjects_videos_paths(fold_data_path, search_pattern="**/test/annotations/*.csv"):
        test_df = pd.read_csv(test_path)
        test_df["arousal"] = overall_arousal_mean
        test_df["valence"] = overall_valence_mean
        save_df(test_df, test_path, competition_data_path, results_save_path)

### Subject-wise model

In [31]:
# prepare paths
model_name = "subject-wise_mean"
results_save_path = results_root / model_name / "results"

# compute baseline
for fold_num in range(4):
    fold_data_path = scenario3_data_path / f"fold_{fold_num}"
    for subject_num, paths_list in iter_train_subjects_and_get_paths(fold_data_path):
        subject_arousal_results = list()
        subject_valence_results = list()
        for train_path in paths_list:
            mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
            subject_arousal_results.append(mean["arousal"])
            subject_valence_results.append(mean["valence"])
        subject_arousal_mean = np.mean(subject_arousal_results)
        subject_valence_mean = np.mean(subject_valence_results)
        # save predictions
        for test_path in iter_subjects_videos_paths(fold_data_path, search_pattern=f"**/test/annotations/sub_{subject_num}*.csv"):
            test_df = pd.read_csv(test_path)
            test_df["arousal"] = subject_arousal_mean
            test_df["valence"] = subject_valence_mean
            save_df(test_df, test_path, competition_data_path, results_save_path)

## Scenario 4

In [32]:
scenario4_data_path = competition_data_path / "scenario_4"

### Fold-wise model

In [33]:
# prepare paths
model_name = "fold-wise_mean"
results_save_path = results_root / model_name / "results"

for fold_num in range(2):
    fold_data_path = scenario4_data_path / f"fold_{fold_num}"
    # compute baseline
    arousal_results = list()
    valence_results = list()
    for train_path in iter_subjects_videos_paths(fold_data_path):
        mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
        arousal_results.append(mean["arousal"])
        valence_results.append(mean["valence"])
    overall_arousal_mean = np.mean(arousal_results)
    overall_valence_mean = np.mean(valence_results)

    # save predictions
    for test_path in iter_subjects_videos_paths(fold_data_path, search_pattern="**/test/annotations/*.csv"):
        test_df = pd.read_csv(test_path)
        test_df["arousal"] = overall_arousal_mean
        test_df["valence"] = overall_valence_mean
        save_df(test_df, test_path, competition_data_path, results_save_path)

### Subject-wise model

In [34]:
# prepare paths
model_name = "subject-wise_mean"
results_save_path = results_root / model_name / "results"

# compute baseline
for fold_num in range(2):
    fold_data_path = scenario4_data_path / f"fold_{fold_num}"
    for subject_num, paths_list in iter_train_subjects_and_get_paths(fold_data_path):
        subject_arousal_results = list()
        subject_valence_results = list()
        for train_path in paths_list:
            mean = pd.read_csv(train_path)[["arousal", "valence"]].mean()
            subject_arousal_results.append(mean["arousal"])
            subject_valence_results.append(mean["valence"])
        subject_arousal_mean = np.mean(subject_arousal_results)
        subject_valence_mean = np.mean(subject_valence_results)
        # save predictions
        for test_path in iter_subjects_videos_paths(fold_data_path, search_pattern=f"**/test/annotations/sub_{subject_num}*.csv"):
            test_df = pd.read_csv(test_path)
            test_df["arousal"] = subject_arousal_mean
            test_df["valence"] = subject_valence_mean
            save_df(test_df, test_path, competition_data_path, results_save_path)

# Multilinear predictions

In [35]:
def get_subjects_paths(scenario_path, train_or_test):
    subjects_paths = dict()
    for physio_path, annot_path in zip(
        sorted(scenario_path.glob(pattern=f"**/{train_or_test}/physiology/*.csv")), sorted(scenario_path.glob(pattern=f"**/{train_or_test}/annotations/*.csv"))
        ):
        assert physio_path.name == annot_path.name
        subject_num = physio_path.stem.split("_")[1]
        subjects_paths.setdefault(subject_num, {"physiology": list(), "annotations": list()})
        subjects_paths[subject_num]["physiology"].append(physio_path)
        subjects_paths[subject_num]["annotations"].append(annot_path)
    return subjects_paths

def get_videos_paths(scenario_path, train_or_test):
    videos_paths = dict()
    for physio_path, annot_path in zip(
        sorted(scenario_path.glob(pattern=f"**/{train_or_test}/physiology/*.csv")), sorted(scenario_path.glob(pattern=f"**/{train_or_test}/annotations/*.csv"))
        ):
        assert physio_path.name == annot_path.name
        vid_num = physio_path.stem.split("_")[3]
        videos_paths.setdefault(vid_num, {"physiology": list(), "annotations": list()})
        videos_paths[vid_num]["physiology"].append(physio_path)
        videos_paths[vid_num]["annotations"].append(annot_path)
    return videos_paths

def iter_subjects_videos_paths(scenario_path, train_or_test):
    for physio_path, annot_path in zip(
        sorted(scenario_path.glob(pattern=f"**/{train_or_test}/physiology/*.csv")), sorted(scenario_path.glob(pattern=f"**/{train_or_test}/annotations/*.csv"))
        ):
        assert physio_path.name == annot_path.name
        yield physio_path, annot_path

def iter_train_subjects_and_get_paths(scenario_path, train_or_test):
    subjects_paths = get_subjects_paths(scenario_path, train_or_test)
    for subject_num, subjects_paths_dict in subjects_paths.items():
        yield subject_num, subjects_paths_dict['physiology'], subjects_paths_dict['annotations']

def iter_train_videos_and_get_paths(scenario_path, train_or_test):
    videos_paths = get_videos_paths(scenario_path, train_or_test)
    for video_num, videos_paths_dict in videos_paths.items():
        yield video_num, videos_paths_dict['physiology'], videos_paths_dict['annotations']

## Scenario 1

In [36]:
scenario1_data_path = competition_data_path / "scenario_1"

### fold-wise model

In [37]:
# prepare paths
model_name = "fold-wise_multilinear"
results_save_path = results_root / model_name / "results"

# compute baseline
arousal_annots = list()
valence_annots = list()
physiology = list()
for train_physio_path, train_annot_path in iter_subjects_videos_paths(scenario1_data_path, 'train'):
    annot_df = pd.read_csv(train_annot_path)
    arousal_annots.append(annot_df["arousal"].to_numpy())
    valence_annots.append(annot_df["valence"].to_numpy())
    physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
    physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
arousal_annots = np.concatenate(arousal_annots)
valence_annots = np.concatenate(valence_annots)
physiology = np.concatenate(physiology)

arousal_model = LinearRegression()
valence_model = LinearRegression()

arousal_model.fit(physiology, arousal_annots)
valence_model.fit(physiology, valence_annots)

# save predictions
for test_physio_path, test_annot_path in iter_subjects_videos_paths(scenario1_data_path, 'test'):
    test_annot_df = pd.read_csv(test_annot_path)
    test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
    physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
    test_annot_df["arousal"] = arousal_model.predict(physiology)
    test_annot_df["valence"] = valence_model.predict(physiology)
    save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

### subject-wise model

In [38]:
# prepare paths
model_name = "subject-wise_multilinear"
results_save_path = results_root / model_name / "results"

subjects_models = dict()

for subject_num, train_physio_paths_list, train_annot_paths_list in iter_train_subjects_and_get_paths(scenario1_data_path, 'train'):
    subject_arousal_annots = list()
    subject_valence_annots = list()
    subject_physiology = list()
    for train_physio_path, train_annot_path in zip(train_physio_paths_list, train_annot_paths_list):
        annot_df = pd.read_csv(train_annot_path)
        subject_arousal_annots.append(annot_df["arousal"].to_numpy())
        subject_valence_annots.append(annot_df["valence"].to_numpy())
        physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
        subject_physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
    subject_arousal_annots = np.concatenate(subject_arousal_annots)
    subject_valence_annots = np.concatenate(subject_valence_annots)
    subject_physiology = np.concatenate(subject_physiology)

    subject_arousal_model = LinearRegression()
    subject_valence_model = LinearRegression()

    subject_arousal_model.fit(subject_physiology, subject_arousal_annots)
    subject_valence_model.fit(subject_physiology, subject_valence_annots)
    subjects_models.setdefault(subject_num, {"arousal": subject_arousal_model, "valence": subject_valence_model})

# save predictions
for subject_num, test_physio_paths_list, test_annot_paths_list in iter_train_subjects_and_get_paths(scenario1_data_path, 'test'):
    for test_physio_path, test_annot_path in zip(test_physio_paths_list, test_annot_paths_list):
        test_annot_df = pd.read_csv(test_annot_path)
        test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
        physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
        test_annot_df["arousal"] = subjects_models[subject_num]['arousal'].predict(physiology)
        test_annot_df["valence"] = subjects_models[subject_num]['valence'].predict(physiology)
        save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

### Video-wise model

In [39]:
# prepare paths
model_name = "video-wise_multilinear"
results_save_path = results_root / model_name / "results"

videos_models = dict()

for vid_num, train_physio_paths_list, train_annot_paths_list in iter_train_videos_and_get_paths(scenario1_data_path, 'train'):
    video_arousal_annots = list()
    video_valence_annots = list()
    video_physiology = list()
    for train_physio_path, train_annot_path in zip(train_physio_paths_list, train_annot_paths_list):
        annot_df = pd.read_csv(train_annot_path)
        video_arousal_annots.append(annot_df["arousal"].to_numpy())
        video_valence_annots.append(annot_df["valence"].to_numpy())
        physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
        video_physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
    video_arousal_annots = np.concatenate(video_arousal_annots)
    video_valence_annots = np.concatenate(video_valence_annots)
    video_physiology = np.concatenate(video_physiology)

    video_arousal_model = LinearRegression()
    video_valence_model = LinearRegression()

    video_arousal_model.fit(video_physiology, video_arousal_annots)
    video_valence_model.fit(video_physiology, video_valence_annots)
    videos_models.setdefault(vid_num, {"arousal": video_arousal_model, "valence": video_valence_model})

# save predictions
for vid_num, test_physio_paths_list, test_annot_paths_list in iter_train_videos_and_get_paths(scenario1_data_path, 'test'):
    for test_physio_path, test_annot_path in zip(test_physio_paths_list, test_annot_paths_list):
        test_annot_df = pd.read_csv(test_annot_path)
        test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
        physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
        test_annot_df["arousal"] = videos_models[vid_num]['arousal'].predict(physiology)
        test_annot_df["valence"] = videos_models[vid_num]['valence'].predict(physiology)
        save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

### Subvid-wise model

In [40]:
# prepare paths
model_name = "subvid-wise_multilinear"
results_save_path = results_root / model_name / "results"

# compute baseline
subvid_models = dict()

for train_physio_path, train_annot_path in iter_subjects_videos_paths(scenario1_data_path, 'train'):
    subvid_stem = train_physio_path.stem
    annot_df = pd.read_csv(train_annot_path)
    subvid_arousal_annots = annot_df["arousal"].to_numpy()
    subvid_valence_annots = annot_df["valence"].to_numpy()
    physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
    subvid_physiology = physio_df.iloc[annot_df['time']].to_numpy()
    subvid_arousal_model = LinearRegression()
    subvid_valence_model = LinearRegression()
    subvid_arousal_model.fit(subvid_physiology, subvid_arousal_annots)
    subvid_valence_model.fit(subvid_physiology, subvid_valence_annots)
    subvid_models.setdefault(subvid_stem, {"arousal": subvid_arousal_model, "valence": subvid_valence_model})

# save predictions
for test_physio_path, test_annot_path in iter_subjects_videos_paths(scenario1_data_path, 'test'):
    subvid_stem = test_physio_path.stem
    test_annot_df = pd.read_csv(test_annot_path)
    test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
    physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
    test_annot_df["arousal"] = subvid_models[subvid_stem]['arousal'].predict(physiology)
    test_annot_df["valence"] = subvid_models[subvid_stem]['valence'].predict(physiology)
    save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

## Scenario 2

In [41]:
scenario2_data_path = competition_data_path / "scenario_2"

### Fold-wise model

In [42]:
# prepare paths
model_name = "fold-wise_multilinear"
results_save_path = results_root / model_name / "results"

for fold_num in range(5):
    fold_data_path = scenario2_data_path / f"fold_{fold_num}"
    # compute baseline
    arousal_annots = list()
    valence_annots = list()
    physiology = list()
    for train_physio_path, train_annot_path in iter_subjects_videos_paths(fold_data_path, 'train'):
        annot_df = pd.read_csv(train_annot_path)
        arousal_annots.append(annot_df["arousal"].to_numpy())
        valence_annots.append(annot_df["valence"].to_numpy())
        physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
        physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
    arousal_annots = np.concatenate(arousal_annots)
    valence_annots = np.concatenate(valence_annots)
    physiology = np.concatenate(physiology)

    arousal_model = LinearRegression()
    valence_model = LinearRegression()

    arousal_model.fit(physiology, arousal_annots)
    valence_model.fit(physiology, valence_annots)

    # save predictions
    for test_physio_path, test_annot_path in iter_subjects_videos_paths(fold_data_path, 'test'):
        test_annot_df = pd.read_csv(test_annot_path)
        test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
        physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
        test_annot_df["arousal"] = arousal_model.predict(physiology)
        test_annot_df["valence"] = valence_model.predict(physiology)
        save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

### Video-wise model

In [43]:
# prepare paths
model_name = "video-wise_multilinear"
results_save_path = results_root / model_name / "results"

for fold_num in range(5):
    fold_data_path = scenario2_data_path / f"fold_{fold_num}"
    videos_models = dict()

    for vid_num, train_physio_paths_list, train_annot_paths_list in iter_train_videos_and_get_paths(fold_data_path, 'train'):
        video_arousal_annots = list()
        video_valence_annots = list()
        video_physiology = list()
        for train_physio_path, train_annot_path in zip(train_physio_paths_list, train_annot_paths_list):
            annot_df = pd.read_csv(train_annot_path)
            video_arousal_annots.append(annot_df["arousal"].to_numpy())
            video_valence_annots.append(annot_df["valence"].to_numpy())
            physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
            video_physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
        video_arousal_annots = np.concatenate(video_arousal_annots)
        video_valence_annots = np.concatenate(video_valence_annots)
        video_physiology = np.concatenate(video_physiology)

        video_arousal_model = LinearRegression()
        video_valence_model = LinearRegression()

        video_arousal_model.fit(video_physiology, video_arousal_annots)
        video_valence_model.fit(video_physiology, video_valence_annots)
        videos_models.setdefault(vid_num, {"arousal": video_arousal_model, "valence": video_valence_model})

    # save predictions
    for vid_num, test_physio_paths_list, test_annot_paths_list in iter_train_videos_and_get_paths(fold_data_path, 'test'):
        for test_physio_path, test_annot_path in zip(test_physio_paths_list, test_annot_paths_list):
            test_annot_df = pd.read_csv(test_annot_path)
            test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
            physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
            test_annot_df["arousal"] = videos_models[vid_num]['arousal'].predict(physiology)
            test_annot_df["valence"] = videos_models[vid_num]['valence'].predict(physiology)
            save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

## Scenario 3

In [44]:
scenario3_data_path = competition_data_path / "scenario_3"

### Fold-wise model

In [45]:
# prepare paths
model_name = "fold-wise_multilinear"
results_save_path = results_root / model_name / "results"

for fold_num in range(4):
    fold_data_path = scenario3_data_path / f"fold_{fold_num}"
    # compute baseline
    arousal_annots = list()
    valence_annots = list()
    physiology = list()
    for train_physio_path, train_annot_path in iter_subjects_videos_paths(fold_data_path, 'train'):
        annot_df = pd.read_csv(train_annot_path)
        arousal_annots.append(annot_df["arousal"].to_numpy())
        valence_annots.append(annot_df["valence"].to_numpy())
        physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
        physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
    arousal_annots = np.concatenate(arousal_annots)
    valence_annots = np.concatenate(valence_annots)
    physiology = np.concatenate(physiology)

    arousal_model = LinearRegression()
    valence_model = LinearRegression()

    arousal_model.fit(physiology, arousal_annots)
    valence_model.fit(physiology, valence_annots)

    # save predictions
    for test_physio_path, test_annot_path in iter_subjects_videos_paths(fold_data_path, 'test'):
        test_annot_df = pd.read_csv(test_annot_path)
        test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
        physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
        test_annot_df["arousal"] = arousal_model.predict(physiology)
        test_annot_df["valence"] = valence_model.predict(physiology)
        save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

### Subject-wise model

In [46]:
# prepare paths
model_name = "subject-wise_multilinear"
results_save_path = results_root / model_name / "results"

for fold_num in range(4):
    fold_data_path = scenario3_data_path / f"fold_{fold_num}"
    subjects_models = dict()

    for subject_num, train_physio_paths_list, train_annot_paths_list in iter_train_subjects_and_get_paths(fold_data_path, 'train'):
        subject_arousal_annots = list()
        subject_valence_annots = list()
        subject_physiology = list()
        for train_physio_path, train_annot_path in zip(train_physio_paths_list, train_annot_paths_list):
            annot_df = pd.read_csv(train_annot_path)
            subject_arousal_annots.append(annot_df["arousal"].to_numpy())
            subject_valence_annots.append(annot_df["valence"].to_numpy())
            physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
            subject_physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
        subject_arousal_annots = np.concatenate(subject_arousal_annots)
        subject_valence_annots = np.concatenate(subject_valence_annots)
        subject_physiology = np.concatenate(subject_physiology)

        subject_arousal_model = LinearRegression()
        subject_valence_model = LinearRegression()

        subject_arousal_model.fit(subject_physiology, subject_arousal_annots)
        subject_valence_model.fit(subject_physiology, subject_valence_annots)
        subjects_models.setdefault(subject_num, {"arousal": subject_arousal_model, "valence": subject_valence_model})

    # save predictions
    for subject_num, test_physio_paths_list, test_annot_paths_list in iter_train_subjects_and_get_paths(fold_data_path, 'test'):
        for test_physio_path, test_annot_path in zip(test_physio_paths_list, test_annot_paths_list):
            test_annot_df = pd.read_csv(test_annot_path)
            test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
            physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
            test_annot_df["arousal"] = subjects_models[subject_num]['arousal'].predict(physiology)
            test_annot_df["valence"] = subjects_models[subject_num]['valence'].predict(physiology)
            save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

## Scenario 4

In [47]:
scenario4_data_path = competition_data_path / "scenario_4"

### Fold-wise model

In [48]:
# prepare paths
model_name = "fold-wise_multilinear"
results_save_path = results_root / model_name / "results"

for fold_num in range(2):
    fold_data_path = scenario4_data_path / f"fold_{fold_num}"
    # compute baseline
    arousal_annots = list()
    valence_annots = list()
    physiology = list()
    for train_physio_path, train_annot_path in iter_subjects_videos_paths(fold_data_path, 'train'):
        annot_df = pd.read_csv(train_annot_path)
        arousal_annots.append(annot_df["arousal"].to_numpy())
        valence_annots.append(annot_df["valence"].to_numpy())
        physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
        physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
    arousal_annots = np.concatenate(arousal_annots)
    valence_annots = np.concatenate(valence_annots)
    physiology = np.concatenate(physiology)

    arousal_model = LinearRegression()
    valence_model = LinearRegression()

    arousal_model.fit(physiology, arousal_annots)
    valence_model.fit(physiology, valence_annots)

    # save predictions
    for test_physio_path, test_annot_path in iter_subjects_videos_paths(fold_data_path, 'test'):
        test_annot_df = pd.read_csv(test_annot_path)
        test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
        physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
        test_annot_df["arousal"] = arousal_model.predict(physiology)
        test_annot_df["valence"] = valence_model.predict(physiology)
        save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)

### Subject-wise model

In [49]:
# prepare paths
model_name = "subject-wise_multilinear"
results_save_path = results_root / model_name / "results"

for fold_num in range(2):
    fold_data_path = scenario4_data_path / f"fold_{fold_num}"
    subjects_models = dict()

    for subject_num, train_physio_paths_list, train_annot_paths_list in iter_train_subjects_and_get_paths(fold_data_path, 'train'):
        subject_arousal_annots = list()
        subject_valence_annots = list()
        subject_physiology = list()
        for train_physio_path, train_annot_path in zip(train_physio_paths_list, train_annot_paths_list):
            annot_df = pd.read_csv(train_annot_path)
            subject_arousal_annots.append(annot_df["arousal"].to_numpy())
            subject_valence_annots.append(annot_df["valence"].to_numpy())
            physio_df = pd.read_csv(train_physio_path).drop(columns=["time"])
            subject_physiology.append(physio_df.iloc[annot_df['time']].to_numpy())
        subject_arousal_annots = np.concatenate(subject_arousal_annots)
        subject_valence_annots = np.concatenate(subject_valence_annots)
        subject_physiology = np.concatenate(subject_physiology)

        subject_arousal_model = LinearRegression()
        subject_valence_model = LinearRegression()

        subject_arousal_model.fit(subject_physiology, subject_arousal_annots)
        subject_valence_model.fit(subject_physiology, subject_valence_annots)
        subjects_models.setdefault(subject_num, {"arousal": subject_arousal_model, "valence": subject_valence_model})

    # save predictions
    for subject_num, test_physio_paths_list, test_annot_paths_list in iter_train_subjects_and_get_paths(fold_data_path, 'test'):
        for test_physio_path, test_annot_path in zip(test_physio_paths_list, test_annot_paths_list):
            test_annot_df = pd.read_csv(test_annot_path)
            test_physio_df = pd.read_csv(test_physio_path).drop(columns=["time"])
            physiology = test_physio_df.iloc[test_annot_df['time']].to_numpy()
            test_annot_df["arousal"] = subjects_models[subject_num]['arousal'].predict(physiology)
            test_annot_df["valence"] = subjects_models[subject_num]['valence'].predict(physiology)
            save_df(test_annot_df, test_annot_path, competition_data_path, results_save_path)