In [1]:
import pandas as pd
from tools import *
from CONSTANT import *
import os

from tqdm import tqdm

import matplotlib.pyplot as plt

%matplotlib inline

# HKU956

In [2]:
av_rating = pd.read_csv(r'./HKU956/3. AV_ratings_duration.csv')
av_rating['valence'] = av_rating['valence'].apply(lambda x: 1 if x == 'positive' else 0)
av_rating['arousal'] = av_rating['arousal'].apply(lambda x: 1 if x == 'positive' else 0)
av_rating.head()

Unnamed: 0,participant_id,song_no,song_id,valence_rating,valence,arousal_rating,arousal,play_duration
0,hku1919,16,370177,9.3,1,1.0,1,72
1,hku1919,8,1119687,9.8,1,-3.0,0,73
2,hku1919,18,77933,-0.1,0,1.9,1,41
3,hku1919,14,238585,1.0,1,-1.8,0,66
4,hku1919,20,1168711,0.2,1,-3.4,0,33


In [17]:
def signal_processer(user, signal_name, signal_files, win_size=4, step=2):
    all_signals = {}
    tmp_signals = []
    va_info_col = ['participant_id', 'song_no', 'song_id', 'valence_rating',
                   'valence', 'arousal_rating', 'arousal', 'play_duration']
    for file in signal_files:
        try:
            filename = os.path.split(file)[-1]
            filename, file_extension = os.path.splitext(filename)
            if file_extension != '.csv':
                continue
            # songno, songid = filename.split('_')
            
            signal = pd.read_csv(file, header=None).iloc[:,0].values

            # bandpass or lowpass filtering & resampling
            if signal_name == 'EDA':
                signal = butter_lowpass_filter(signal, cutOff=CUTOFF['EDA'], fs=SAMPLERATE['EDA'])
                signal = resample_by_interpolation(signal, input_fs=SAMPLERATE['EDA'], output_fs=100)
            elif signal_name in ['TEMP', 'BVP']:
                signal = butter_bandpass_filter(signal, lowcut=CUTOFF[signal_name][0], highcut=CUTOFF[signal_name][1], fs=SAMPLERATE[signal_name])
                signal = resample_by_interpolation(signal, input_fs=SAMPLERATE[signal_name], output_fs=100)
            elif signal_name == 'HR': # HR, IBI
                signal = iqr_filter(pd.Series(signal)).values
                print(signal)
                signal = resample_by_interpolation(signal, input_fs=SAMPLERATE[signal_name], output_fs=100)
            else: # IBI
                continue

            tmp_signals.extend(signal.tolist())
            all_signals[filename] = signal
            
        except Exception as e:
            print(e, file)
        
    signal_max = np.max(tmp_signals)
    signal_min = np.min(tmp_signals)

    del tmp_signals

    signal_df = []

    # normalization, segmentation, $ concatenation
    for filename, signal in all_signals.items():
        song_no, song_id = filename.split('_')
        signal = (signal - signal_min) / (signal_max - signal_min)
        
        segments = np.array(segment_generator(signal, win_size=win_size*100, step=step*100)[1:])
        seg_cols = ['{}_seg{}'.format(signal_name, i) for i in range(segments.shape[1])]
        segment_df = pd.DataFrame(columns=seg_cols, data=segments)

        segment_df['segment_id'] = segment_df.index.tolist()
        segment_df['song_no'] = int(song_no)
        segment_df['song_id'] = int(song_id)
        segment_df['user'] = user

        # va_info = [av_rating[(av_rating['participant_id']==user) & (av_rating['song_no']==int(songno)) & (av_rating['song_id']==int(songid))].values.tolist()[0] for _ in range(len(segments))]
        # tmp_va_info = pd.DataFrame(columns=va_info_col, data=va_info)
        # segment_df = pd.concat([tmp_va_info, segment_df], axis=1)
        signal_df.append(segment_df)
    
    return pd.concat(signal_df)

In [None]:
# users = os.listdir(HKU_DIR)
# hku_data = {}

# for signal_name in ['HR']:
# # for signal_name in SIGNALS:
#     if signal_name == 'IBI':
#         continue
#     signals = []
#     for user in users:
#         signal_files = get_folder_files(os.path.join(HKU_DIR, user, signal_name))
#         signal_df = signal_processer(user, signal_name, signal_files)
#         signals.append(signal_df)
#     signals = pd.concat(signals)
#     signals.to_csv(os.path.join(PROCESSED_DIR, 'HKU956', '{}.csv'.format(signal_name)), index=False)
#     print(signal_name, signals.shape)
#     hku_data[signal_name] = signals

In [18]:
users = os.listdir(HKU_DIR)
hku_data = {}

for signal_name in ['HR']:
# for signal_name in SIGNALS:
    if signal_name == 'IBI':
        continue
    signals = []
    for user in users:
        signal_files = get_folder_files(os.path.join(HKU_DIR, user, signal_name))
        signal_df = signal_processer(user, signal_name, signal_files)
        signals.append(signal_df)
        break
    signals = pd.concat(signals)
    # signals.to_csv(os.path.join(PROCESSED_DIR, 'HKU956', '{}.csv'.format(signal_name)), index=False)
    print(signal_name, signals.shape)
    hku_data[signal_name] = signals

[71.03 70.97 70.97 70.95 70.95 71.   71.05 71.12 71.18 71.15 71.12 71.08
 71.05 71.03 71.03 71.03 71.03 71.02 71.05 71.08 71.12 71.18 71.2  71.12
 71.08 71.03 71.02 71.   71.03 71.07 71.12 71.18 71.3  71.42 71.57 71.73
 71.9  72.08 72.25 72.38 72.48 72.63 72.8  72.95 73.1  73.23 73.35 73.47
 73.45 73.42 73.4  73.38 73.35 73.33 73.32 73.3  73.28 73.28 73.28 73.3
 73.32 73.37 73.38 73.38 73.38 73.35 73.28 73.23 73.22 73.32 73.42 73.53
 73.65 73.75 73.82 73.87 73.88 73.88 73.87 73.83 73.78 73.72 73.67 73.63
 73.6  73.58 73.57 73.57 73.53 73.5  73.45 73.38 73.33 73.27 73.2  73.15
 73.12 73.12 73.12 73.15 73.22 73.27 73.32 73.4  73.5  73.6  73.7  73.82
 73.9  74.13 74.35 74.53 74.73 74.95 75.23 75.55 75.85 76.13 76.33 76.52
 76.7  76.83 77.02 77.23 77.43 77.43 77.43 77.43 77.43 77.43 77.43 77.43
 77.43 77.43 77.43 77.43 77.43 77.43 77.43 77.43 77.43 77.43 77.43 77.43
 77.43 77.43 77.43]
[77.77 77.82 77.87 77.92 77.93 77.93 78.02 78.07 78.03 78.   77.97 77.93
 77.9  77.87 77.8  77.73 77.67 7

In [14]:
hku_data['TEMP']

Unnamed: 0,TEMP_seg0,TEMP_seg1,TEMP_seg2,TEMP_seg3,TEMP_seg4,TEMP_seg5,TEMP_seg6,TEMP_seg7,TEMP_seg8,TEMP_seg9,...,TEMP_seg394,TEMP_seg395,TEMP_seg396,TEMP_seg397,TEMP_seg398,TEMP_seg399,segment_id,song_no,song_id,user
0,0.302920,0.302927,0.302934,0.302940,0.302947,0.302954,0.302960,0.302967,0.302974,0.302980,...,0.591077,0.592713,0.594348,0.595984,0.597619,0.599255,0,0,262957,hku1903
1,0.349069,0.349839,0.350609,0.351378,0.352148,0.352918,0.353687,0.354457,0.355226,0.355996,...,0.897712,0.898910,0.900107,0.901304,0.902502,0.903699,1,0,262957,hku1903
2,0.600890,0.602564,0.604237,0.605910,0.607583,0.609256,0.610929,0.612602,0.614275,0.615949,...,0.997635,0.997511,0.997386,0.997262,0.997137,0.997013,2,0,262957,hku1903
3,0.904897,0.905944,0.906992,0.908040,0.909087,0.910135,0.911183,0.912231,0.913278,0.914326,...,0.872360,0.871438,0.870515,0.869593,0.868670,0.867747,3,0,262957,hku1903
4,0.996888,0.996615,0.996342,0.996070,0.995797,0.995524,0.995251,0.994978,0.994705,0.994432,...,0.683783,0.682923,0.682064,0.681204,0.680344,0.679484,4,0,262957,hku1903
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14,0.143894,0.143719,0.143543,0.143367,0.143191,0.143015,0.142840,0.142664,0.142488,0.142312,...,0.081868,0.081731,0.081594,0.081457,0.081319,0.081182,14,9,405924,hku1932
15,0.110471,0.110315,0.110158,0.110002,0.109845,0.109689,0.109533,0.109376,0.109220,0.109064,...,0.057144,0.057033,0.056922,0.056811,0.056700,0.056589,15,9,405924,hku1932
16,0.081045,0.080910,0.080776,0.080642,0.080508,0.080373,0.080239,0.080105,0.079970,0.079836,...,0.037798,0.037714,0.037629,0.037545,0.037460,0.037376,16,9,405924,hku1932
17,0.056477,0.056370,0.056262,0.056154,0.056047,0.055939,0.055832,0.055724,0.055616,0.055509,...,0.023366,0.023304,0.023241,0.023179,0.023117,0.023055,17,9,405924,hku1932
