In [1]:
import os
import pandas as pd
from glob import glob
from scipy.io import loadmat

In [2]:
root_path = './'
mats = glob(os.path.join(root_path, '*.mat'))
mats = [m for m in mats if 'paintings2019.mat' not in m]

In [3]:
def compute_interval_time(arr):
    if 0 in arr:
        return [0] * 4
    else:
        return [round(arr[i+1] - arr[i], 2) for i in range(4)]
        # return [arr[i+1] - arr[i] for i in range(4)]

def compute_expl_interval_time(arr):
    n = len(arr)
    if 0 in arr:
        return [0] * (n-1)
    else:
        return [round(arr[i+1] - arr[i], 2) for i in range(n-1)]
    
def compute_expl_time_per_pic(expltimes, explpics):
    interval_time = compute_expl_interval_time(expltimes)
    time_per_pic = {}
    for i, pic in enumerate(explpics[:-1]):
        if pic not in time_per_pic:
            time_per_pic[pic] = interval_time[i]
        else:
            time_per_pic[pic] += interval_time[i]
    return time_per_pic

def compute_expl_time(mat):
    expltimes = mat['Expltime'][0]
    explpics = mat['Explpic'][0]
    expl_time_per_pic = compute_expl_time_per_pic(expltimes, explpics)
    times = []
    for i in range(mat['Decmap'].shape[0]):
        pic_times = []
        for pic in mat['Decmap'][i]:
            pic_times.append(expl_time_per_pic.get(pic, 0))
        times.append(pic_times)
    return times

In [4]:
dfs = []
ids = []
moneys = []
for mat in mats:
    data = loadmat(mat)
    id = os.path.basename(mat).split(".")[0].zfill(2)
    d = {}
    for key in ["Decmap", "Decpic", "Dectime", "Payoff"]:
        d[key] = data[key]
    d["Dectime_interval"] = [compute_interval_time(arr) for arr in list(d["Dectime"])]
    d["Decpics_expltime"] = compute_expl_time(data)
    df = pd.DataFrame(
        {
            "ID": "2025" + id,
            "Group": data["Group"][0],
            "round": list(range(1, len(list(d["Decmap"])) + 1)),
            "Decmaps": [list(row) for row in d["Decmap"]],
            "Decpics": [list(row) for row in d["Decpic"]],
            "Expltime": [list(row) for row in d["Decpics_expltime"]],
            "Dectimes_interval": [list(row) for row in d["Dectime_interval"]],
            "Dectimes": [list(row) for row in d["Dectime"]],
            "Payoffs": [list(row) for row in d["Payoff"]],
        }
    )
    dfs.append(df)
    ids.append("2025" + id)
    moneys.append(data["Money"][0][0])
    print(f"Processed ID: 2025{id}, total rounds: {len(df)}")

Processed ID: 202513, total rounds: 36
Processed ID: 202511, total rounds: 28
Processed ID: 202510, total rounds: 22
Processed ID: 202514, total rounds: 21
Processed ID: 202508, total rounds: 34
Processed ID: 202509, total rounds: 31
Processed ID: 202502, total rounds: 23
Processed ID: 202503, total rounds: 16
Processed ID: 202501, total rounds: 23
Processed ID: 202504, total rounds: 32
Processed ID: 202505, total rounds: 31
Processed ID: 202507, total rounds: 34
Processed ID: 202506, total rounds: 37


In [5]:
full_df = pd.concat(dfs, ignore_index=True)
full_df = full_df.sort_values(by=['ID', 'round']).reset_index(drop=True)
full_df


Unnamed: 0,ID,Group,round,Decmaps,Decpics,Expltime,Dectimes_interval,Dectimes,Payoffs
0,202501,HL,1,"[328, 315, 300, 267, 261, 352, 324, 360, 276, ...","[185, 192, 297, 193]","[0, 36.12, 0, 0, 3.0799999999999996, 0, 0, 0.9...","[57.4, 0.58, 10.15, 3.97]","[53398.134, 53455.53, 53456.106, 53466.258, 53...","[6, 6, 4, 6]"
1,202501,HL,2,"[77, 118, 220, 36, 113, 51, 79, 231, 136, 68, ...","[136, 113, 118, 115]","[11.09, 0, 0.8500000000000001, 2.5300000000000...","[16.43, 0.39, 0.24, 6.27]","[53473.228, 53489.656, 53490.046, 53490.287, 5...","[2, 2, 2, 2]"
2,202501,HL,3,"[26, 213, 258, 323, 154, 185, 23, 321, 307, 3,...","[258, 174, 180, 154]","[0, 0, 0, 0, 3.9800000000000004, 11.7999999999...","[29.07, 4.49, 4.73, 2.6]","[53780.331, 53809.398, 53813.886, 53818.614, 5...","[2, 5, 5, 5]"
3,202501,HL,4,"[87, 222, 234, 220, 346, 63, 88, 134, 55, 341,...","[110, 234, 220, 222]","[0, 0, 0, 0.8500000000000001, 0, 0.33, 0, 0, 0...","[20.13, 20.06, 1.06, 16.89]","[53822.978, 53843.111, 53863.171, 53864.227, 5...","[2, 6, 6, 6]"
4,202501,HL,5,"[37, 250, 47, 180, 117, 249, 337, 125, 356, 14...","[37, 47, 161, 356]","[0, 0, 0, 0, 0, 0, 0.21, 0, 0, 0, 0, 116.77, 0...","[15.4, 2.54, 1.41, 2.86]","[54106.991, 54122.394, 54124.932, 54126.339, 5...","[4, 4, 5, 3]"
...,...,...,...,...,...,...,...,...,...
363,202514,Lc,17,"[355, 61, 211, 182, 135, 49, 180, 144, 351, 15...","[211, 182, 193, 212]","[0, 14.97, 0, 4.25, 3.9299999999999997, 0, 133...","[31.05, 1.76, 3.1, 7.05]","[56404.762, 56435.811, 56437.57, 56440.666, 56...","[6, 6, 6, 6]"
364,202514,Lc,18,"[118, 354, 159, 139, 195, 157, 192, 113, 176, ...","[157, 176, 153, 159]","[2.87, 1.45, 17.1, 0, 0, 0, 461.56, 0, 0, 0, 0...","[29.0, 1.1, 1.86, 3.63]","[56706.605, 56735.608, 56736.704, 56738.56, 56...","[5, 5, 5, 5]"
365,202514,Lc,19,"[38, 29, 8, 263, 232, 288, 83, 51, 283, 245, 2...","[232, 225, 224, 245]","[0, 0, 0, 3.37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[19.25, 2.31, 14.29, 13.77]","[56744.007, 56763.261, 56765.571, 56779.859, 5...","[6, 6, 6, 6]"
366,202514,Lc,20,"[112, 179, 249, 232, 122, 360, 64, 165, 170, 2...","[164, 360, 170, 179]","[0, 0, 69.24, 0, 0, 5.01, 0, 0, 0, 0, 0, 1.160...","[27.55, 2.27, 2.13, 6.44]","[56985.494, 57013.041, 57015.315, 57017.441, 5...","[5, 3, 5, 5]"


In [6]:
full_df.to_csv(os.path.join(root_path, '2025_data.csv'), index=False)

In [7]:
import ast
def str_to_list(x):
    if isinstance(x, str):
        return ast.literal_eval(x)
    return x
full_df = pd.read_csv(os.path.join(root_path, '2025_data.csv'), converters={
        'Decmaps': str_to_list,
        'Decpics': str_to_list,
        'Expltime': str_to_list,
        'Dectimes_interval': str_to_list,
        'Dectimes': str_to_list,
        'Payoffs': str_to_list,
    })
full_df

Unnamed: 0,ID,Group,round,Decmaps,Decpics,Expltime,Dectimes_interval,Dectimes,Payoffs
0,202501,HL,1,"[328, 315, 300, 267, 261, 352, 324, 360, 276, ...","[185, 192, 297, 193]","[0, 36.12, 0, 0, 3.0799999999999996, 0, 0, 0.9...","[57.4, 0.58, 10.15, 3.97]","[53398.134, 53455.53, 53456.106, 53466.258, 53...","[6, 6, 4, 6]"
1,202501,HL,2,"[77, 118, 220, 36, 113, 51, 79, 231, 136, 68, ...","[136, 113, 118, 115]","[11.09, 0, 0.8500000000000001, 2.5300000000000...","[16.43, 0.39, 0.24, 6.27]","[53473.228, 53489.656, 53490.046, 53490.287, 5...","[2, 2, 2, 2]"
2,202501,HL,3,"[26, 213, 258, 323, 154, 185, 23, 321, 307, 3,...","[258, 174, 180, 154]","[0, 0, 0, 0, 3.9800000000000004, 11.7999999999...","[29.07, 4.49, 4.73, 2.6]","[53780.331, 53809.398, 53813.886, 53818.614, 5...","[2, 5, 5, 5]"
3,202501,HL,4,"[87, 222, 234, 220, 346, 63, 88, 134, 55, 341,...","[110, 234, 220, 222]","[0, 0, 0, 0.8500000000000001, 0, 0.33, 0, 0, 0...","[20.13, 20.06, 1.06, 16.89]","[53822.978, 53843.111, 53863.171, 53864.227, 5...","[2, 6, 6, 6]"
4,202501,HL,5,"[37, 250, 47, 180, 117, 249, 337, 125, 356, 14...","[37, 47, 161, 356]","[0, 0, 0, 0, 0, 0, 0.21, 0, 0, 0, 0, 116.77, 0...","[15.4, 2.54, 1.41, 2.86]","[54106.991, 54122.394, 54124.932, 54126.339, 5...","[4, 4, 5, 3]"
...,...,...,...,...,...,...,...,...,...
363,202514,Lc,17,"[355, 61, 211, 182, 135, 49, 180, 144, 351, 15...","[211, 182, 193, 212]","[0, 14.97, 0, 4.25, 3.9299999999999997, 0, 133...","[31.05, 1.76, 3.1, 7.05]","[56404.762, 56435.811, 56437.57, 56440.666, 56...","[6, 6, 6, 6]"
364,202514,Lc,18,"[118, 354, 159, 139, 195, 157, 192, 113, 176, ...","[157, 176, 153, 159]","[2.87, 1.45, 17.1, 0, 0, 0, 461.56, 0, 0, 0, 0...","[29.0, 1.1, 1.86, 3.63]","[56706.605, 56735.608, 56736.704, 56738.56, 56...","[5, 5, 5, 5]"
365,202514,Lc,19,"[38, 29, 8, 263, 232, 288, 83, 51, 283, 245, 2...","[232, 225, 224, 245]","[0, 0, 0, 3.37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[19.25, 2.31, 14.29, 13.77]","[56744.007, 56763.261, 56765.571, 56779.859, 5...","[6, 6, 6, 6]"
366,202514,Lc,20,"[112, 179, 249, 232, 122, 360, 64, 165, 170, 2...","[164, 360, 170, 179]","[0, 0, 69.24, 0, 0, 5.01, 0, 0, 0, 0, 0, 1.160...","[27.55, 2.27, 2.13, 6.44]","[56985.494, 57013.041, 57015.315, 57017.441, 5...","[5, 3, 5, 5]"


In [8]:
def compute_selected_times_avgpayoff(df, n=5):
    df = df.sort_values(['ID', 'round']).copy()

    all_counts = []
    all_avg = []

    for _, sub in df.groupby('ID', sort=False):
        sub_counts = []
        sub_avg = []
        decmaps_list = sub['Decmaps'].tolist()
        decpics_list = sub['Decpics'].tolist()
        payoffs_list = sub['Payoffs'].tolist()

        for i in range(len(sub)):
            start = max(0, i - n)
            end = i
            count_dict = {}
            sum_dict = {}

            for k in range(start, end):
                pics_k = decpics_list[k]
                pays_k = payoffs_list[k]
                pics_k = list(pics_k)
                pays_k = list(pays_k)
                for pic, pay in zip(pics_k, pays_k):
                    count_dict[pic] = count_dict.get(pic, 0) + 1
                    sum_dict[pic] = sum_dict.get(pic, 0.0) + float(pay)

            decmaps_i = list(decmaps_list[i])
            counts_i = []
            avg_i = []

            for pic in decmaps_i:
                c = count_dict.get(pic, 0)
                counts_i.append(c)
                if c > 0:
                    avg_i.append(sum_dict[pic] / c)
                else:
                    avg_i.append(0.0)

            sub_counts.append(counts_i)
            sub_avg.append(avg_i)

        all_counts.extend(sub_counts)
        all_avg.extend(sub_avg)

    df['prev_n_selected_times'] = all_counts
    df['prev_n_avg_payoff'] = all_avg

    return df

df_new = compute_selected_times_avgpayoff(full_df, n=10)
df_new.head(6)

Unnamed: 0,ID,Group,round,Decmaps,Decpics,Expltime,Dectimes_interval,Dectimes,Payoffs,prev_n_selected_times,prev_n_avg_payoff
0,202501,HL,1,"[328, 315, 300, 267, 261, 352, 324, 360, 276, ...","[185, 192, 297, 193]","[0, 36.12, 0, 0, 3.0799999999999996, 0, 0, 0.9...","[57.4, 0.58, 10.15, 3.97]","[53398.134, 53455.53, 53456.106, 53466.258, 53...","[6, 6, 4, 6]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,202501,HL,2,"[77, 118, 220, 36, 113, 51, 79, 231, 136, 68, ...","[136, 113, 118, 115]","[11.09, 0, 0.8500000000000001, 2.5300000000000...","[16.43, 0.39, 0.24, 6.27]","[53473.228, 53489.656, 53490.046, 53490.287, 5...","[2, 2, 2, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,202501,HL,3,"[26, 213, 258, 323, 154, 185, 23, 321, 307, 3,...","[258, 174, 180, 154]","[0, 0, 0, 0, 3.9800000000000004, 11.7999999999...","[29.07, 4.49, 4.73, 2.6]","[53780.331, 53809.398, 53813.886, 53818.614, 5...","[2, 5, 5, 5]","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, ..."
3,202501,HL,4,"[87, 222, 234, 220, 346, 63, 88, 134, 55, 341,...","[110, 234, 220, 222]","[0, 0, 0, 0.8500000000000001, 0, 0.33, 0, 0, 0...","[20.13, 20.06, 1.06, 16.89]","[53822.978, 53843.111, 53863.171, 53864.227, 5...","[2, 6, 6, 6]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,202501,HL,5,"[37, 250, 47, 180, 117, 249, 337, 125, 356, 14...","[37, 47, 161, 356]","[0, 0, 0, 0, 0, 0, 0.21, 0, 0, 0, 0, 116.77, 0...","[15.4, 2.54, 1.41, 2.86]","[54106.991, 54122.394, 54124.932, 54126.339, 5...","[4, 4, 5, 3]","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5,202501,HL,6,"[311, 259, 32, 200, 288, 188, 34, 275, 191, 31...","[191, 200, 94, 103]","[0, 0, 0, 0, 0, 0, 0, 155.47, 0.13, 36.12, 0, ...","[36.61, 3.51, 5.41, 4.45]","[54131.223, 54167.831, 54171.336, 54176.743, 5...","[6, 6, 5, 5]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [9]:
df_new.to_csv(os.path.join(root_path, '2025_data_with_features.csv'), index=False)

In [10]:
df_new.to_pickle(os.path.join(root_path, '2025_data_with_features.pkl'))