In [1]:
import os
import numpy as np
import pandas as pd

os.chdir("..")

In [2]:
from model.shared import all_pair_df, videos2pair_id, pair_id2videos

In [3]:
"""Read all user preference file in database/baseline dir, merge them into a dataframe and save it to csv."""
# get all user preference file names
user_pref_file_names = [
    file for file in os.listdir("database/baseline") if "test" not in file and "pref" in file
]
if len(user_pref_file_names) == 0:
    raise Exception("No user preference file found in database/baseline dir.")
# merge all user preference file into one dataframe
all_user_pref = pd.DataFrame()
for user_pref_file_name in user_pref_file_names:
    if user_pref_file_name == "all_user_pref.csv":
        continue
    user_pref = pd.read_csv(os.path.join("database/baseline", user_pref_file_name))
    user_pref["user_id"] = user_pref_file_name.split("_")[0]
    # add pair_id column
    user_pref["pair_id"] = user_pref.apply(
        lambda row: videos2pair_id(row["video1"], row["video2"]), axis=1
    )
    # make sure the pref score is accurate
    user_pref["pref"] = user_pref.apply(
        lambda row: row["pref"] if (row[["video1", "video2"]].values.tolist()[0] == pair_id2videos(row["pair_id"])) else 1 - row["pref"],
        axis=1,
    )
    all_user_pref = pd.concat(
        [all_user_pref, user_pref[user_pref["pref"] != -1]],
    )
# sort the dataframe by pair_id and user_id
all_user_pref.sort_values(
    by=["pair_id", "user_id"], ascending=True, ignore_index=True, inplace=True
)
all_user_pref["pair_feature"] = all_user_pref["pair_id"].apply(
    lambda pair_id: all_pair_df[all_pair_df["pair_id"] == pair_id][
        "pair_feature"
    ].iloc[0]
)
all_user_pref = all_user_pref[
    ["pair_id", "video1", "video2", "user_id", "pref", "pair_feature"]
]
# save the merged dataframe to csv
all_user_pref.to_csv("database/baseline/all_user_pref.csv", index=False)

In [4]:
all_user_pref

Unnamed: 0,pair_id,video1,video2,user_id,pref,pair_feature
0,0,mg_demo_2477,mg_demo_3362,U07,0.0,"(1, 1)"
1,0,mg_demo_2477,mg_demo_3362,U09,0.0,"(1, 1)"
2,0,mg_demo_2477,mg_demo_3362,U21,0.5,"(1, 1)"
3,0,mg_demo_2477,mg_demo_3362,U33,1.0,"(1, 1)"
4,0,mg_demo_2477,mg_demo_3362,U41,0.5,"(1, 1)"
...,...,...,...,...,...,...
2200,434,ph_demo_20,ph_demo_33,U09,0.0,"(6, 6)"
2201,434,ph_demo_20,ph_demo_33,U13,1.0,"(6, 6)"
2202,434,ph_demo_20,ph_demo_33,U23,0.0,"(6, 6)"
2203,434,ph_demo_20,ph_demo_33,U27,1.0,"(6, 6)"
