## 실행 순서

1. 1셀 test_weight_file 필요한 시드 웨이트 파일로 변경

In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ['CUDA_VISIBLE_DEVICES'] = "0"
test_weight_file = "0126_1628_seed_0_tlab_model.pt"
SEED = int(test_weight_file.split("_")[3])

In [2]:
import random
import torch.nn as nn
import torch.nn.parallel
import torch.optim
from train import train
from val import validate
from test import Test
from utils.parser import parse_configuration
import numpy as np
from models.orig_cam import TLAB_CAM as Custom_CAModel
from models.tsav import TwoStreamAuralVisualModel
from datasets.dataset_val import ImageList_val
from losses.loss import CCCLoss
from datetime import datetime
from torch import nn
import pandas as pd
from warnings import filterwarnings
filterwarnings("ignore")

In [3]:
TrainingAccuracy_V = []
TrainingAccuracy_A = []
ValidationAccuracy_V = []
ValidationAccuracy_A = []

random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)
torch.cuda.manual_seed_all(SEED)


class ValPadSequence:
	def __call__(self, sorted_batch):

		sequences = [x[0] for x in sorted_batch]
		aud_sequences = [x[1] for x in sorted_batch]
		spec_dim = []
		for aud in aud_sequences:
			spec_dim.append(aud.shape[3])

		max_spec_dim = max(spec_dim)
		audio_features = torch.zeros(len(spec_dim), 16, 1, 64, max_spec_dim)
		for batch_idx, spectrogram in enumerate(aud_sequences):
			if spectrogram.shape[2] < max_spec_dim:
				audio_features[batch_idx, :, :, :, -spectrogram.shape[3]:] = spectrogram
			else:
				audio_features[batch_idx, :,:, :, :] = spectrogram

		frameids = [x[2] for x in sorted_batch]
		v_ids = [x[3] for x in sorted_batch]
		v_lengths = [x[4] for x in sorted_batch]
		labelV = [x[5] for x in sorted_batch]
		labelA = [x[6] for x in sorted_batch]

		visual_sequences = torch.stack(sequences)
		labelsV = torch.stack(labelV)
		labelsA = torch.stack(labelA)
		return visual_sequences, audio_features, frameids, v_ids, v_lengths, labelsV, labelsA

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_path = 'ABAW2020TNT/model2/TSAV_Sub4_544k.pth.tar' # path to the model
model = TwoStreamAuralVisualModel(num_channels=4)
saved_model = torch.load(model_path)
model.load_state_dict(saved_model['state_dict'])

new_first_layer = nn.Conv3d(in_channels=3,
					out_channels=model.video_model.r2plus1d.stem[0].out_channels,
					kernel_size=model.video_model.r2plus1d.stem[0].kernel_size,
					stride=model.video_model.r2plus1d.stem[0].stride,
					padding=model.video_model.r2plus1d.stem[0].padding,
					bias=False)

new_first_layer.weight.data = model.video_model.r2plus1d.stem[0].weight.data[:, 0:3]
model.video_model.r2plus1d.stem[0] = new_first_layer
model = nn.DataParallel(model)
model = model.to(device)

### Freezing the model
for p in model.parameters():
	p.requires_grad = False
for p in model.children():
	p.train(False)
 
fusion_model = Custom_CAModel()
# fusion_model = nn.DataParallel(fusion_model)
# fusion_model.cuda()
fusion_model = fusion_model.to(device=device)

cam_model_path = f'SavedWeights/{test_weight_file}' # path to the model
cam_saved_model = torch.load(cam_model_path)
fusion_model.load_state_dict(cam_saved_model['net'])
cammodel_accV = torch.load(cam_model_path)['best_Val_accV']
cammodel_accA = torch.load(cam_model_path)['best_Val_accA']
print("Saved cammodel_accV : ", cammodel_accV)
print("Saved cammodel_accA : ", cammodel_accA)
for param in fusion_model.parameters():  # children():
    param.requires_grad = False

config = "config_file.json"
configuration = parse_configuration(config)

dataset_rootpath = configuration['dataset_rootpath']
dataset_wavspath = configuration['dataset_wavspath']
dataset_labelpath = configuration['labelpath']

def load_partition_set(partition_path, seed):
	import json

	with open(partition_path, 'r') as f:    
		seed_data = json.load(f)

	seed_data_train = seed_data[f'seed_{seed}']['Train_Set']
	seed_data_valid = seed_data[f'seed_{seed}']['Validation_Set']
	seed_data_test  = seed_data[f'seed_{seed}']['Test_Set']
 
	seed_data_train = [fn + ".csv" for fn in seed_data_train]
	seed_data_valid = [fn + ".csv" for fn in seed_data_valid]
	seed_data_test  = [fn + ".csv" for fn in seed_data_test ]

	return seed_data_train, seed_data_valid, seed_data_test

partition_path = "../data/Affwild2/seed_data.json"
train_set, valid_set, test_set = load_partition_set(partition_path, SEED)

init_time = datetime.now()
init_time = init_time.strftime('%m%d_%H%M')

criterion = CCCLoss(digitize_num=1).cuda()

testdataset = ImageList_val(root=configuration['dataset_rootpath'], fileList=test_set, labelPath=dataset_labelpath,
					audList=configuration['dataset_wavspath'], length=configuration['test_params']['seq_length'],
					flag='Test', stride=configuration['test_params']['stride'], dilation = configuration['test_params']['dilation'],
					subseq_length = configuration['test_params']['subseq_length'])

testloader = torch.utils.data.DataLoader(
			testdataset, collate_fn=ValPadSequence(),
			**configuration['test_params']['loader_params'])


initialize network with xavier
Saved cammodel_accV :  0.6246879650361636
Saved cammodel_accA :  0.609533512707734
Number of Sequences: 83


In [4]:
from tqdm import tqdm
import torch
import torch.nn.parallel
import torch.optim
from scipy.ndimage import uniform_filter1d
import numpy as np
import sys
from EvaluationMetrics.cccmetric import ccc


def Test(val_loader, model, criterion, cam):
    # switch to evaluate mode
    global Val_acc
    global best_Val_acc
    global best_Val_acc_epoch
    #model.eval()
    model.eval()
    cam.eval()

    vout = []
    vtar = []
    aout = []
    atar = []
	#torch.cuda.synchronize()
    #t7 = time.time()
    pred_a = dict()
    pred_v = dict()
    label_a = dict()
    label_v = dict()
	#files_dict = {}
    count = 0
    
    vid_pred = {}
    vid_label = {}
    vid_ccc = {}
    global_vid_fts, global_aud_fts= None, None
    
    for batch_idx, (visualdata, audiodata, frame_ids, videos, vid_lengths, labelsV, labelsA) in tqdm(enumerate(val_loader),
                                                            total=len(val_loader), position=0, leave=True):
        
        audiodata = audiodata.cuda()#.unsqueeze(2)
        visualdata = visualdata.cuda()

        with torch.no_grad():
            b, seq_t, c, subseq_t, h, w = visualdata.size()
            visual_feats = torch.empty((b, seq_t, 25088), dtype=visualdata.dtype, device = visualdata.device)
            aud_feats = torch.empty((b, seq_t, 512), dtype=visualdata.dtype, device = visualdata.device)
            for i in range(visualdata.shape[0]):
                audio_feat, visualfeat, _ = model(audiodata[i,:,:,:], visualdata[i, :, :, :,:,:])
                visual_feats[i,:,:] = visualfeat
                aud_feats[i,:,:] = audio_feat

            audiovisual_vouts,audiovisual_aouts = cam(aud_feats, visual_feats)

            audiovisual_vouts = audiovisual_vouts.detach().cpu().numpy()
            audiovisual_aouts = audiovisual_aouts.detach().cpu().numpy()

            labelsV = labelsV.cpu().numpy()
            labelsA = labelsA.cpu().numpy()

            for voutputs, aoutputs, labelV, labelA, frameids, video, vid_length in zip(audiovisual_vouts, audiovisual_aouts, labelsV, labelsA, frame_ids, videos, vid_lengths):
                for voutput, aoutput, labV, labA, frameid, vid, length in zip(voutputs, aoutputs, labelV, labelA, frameids, video, vid_length):
                    if vid not in pred_a:
                        if frameid>1:
                            print(vid)
                            print(length)
                            print("something is wrong")
                            sys.exit()
                        count = count + 1

                        pred_a[vid] = [0]*length
                        pred_v[vid] = [0]*length
                        label_a[vid] = [0]*length
                        label_v[vid] = [0]*length
                        if labV == -5.0:
                            continue
                        pred_a[vid][frameid-1] = aoutput
                        pred_v[vid][frameid-1] = voutput
                        label_a[vid][frameid-1] = labA
                        label_v[vid][frameid-1] = labV
                    else:
                        if frameid <= length:
                            if labV == -5.0:
                                continue
                            pred_a[vid][frameid-1] = aoutput
                            pred_v[vid][frameid-1] = voutput
                            label_a[vid][frameid-1] = labA
                            label_v[vid][frameid-1] = labV
                            

    for idx, key in enumerate(pred_a.keys()):
        clipped_preds_v = np.clip(pred_v[key], -1.0, 1.0)
        clipped_preds_a = np.clip(pred_a[key], -1.0, 1.0)

        smoothened_preds_v = uniform_filter1d(clipped_preds_v, size=20, mode='constant')
        smoothened_preds_a = uniform_filter1d(clipped_preds_a, size=50, mode='constant')
        tars_v = label_v[key]
        tars_a = label_a[key]
        
        key_vout = []
        key_aout = []
        key_vtar = []
        key_atar = []

        for i in range(len(smoothened_preds_a)):
            vout.append(smoothened_preds_v[i])
            aout.append(smoothened_preds_a[i])
            vtar.append(tars_v[i])
            atar.append(tars_a[i])
            
            key_vout.append(smoothened_preds_v[i])
            key_aout.append(smoothened_preds_a[i])
            key_vtar.append(tars_v[i])
            key_atar.append(tars_a[i])
                
        vid_pred[key] = {"vout": type(key_vout), "aout": type(key_aout)}
        vid_label[key] = {"vtar": type(key_vtar), "atar": type(key_atar)}
        vid_ccc[key] = {"vccc": 0, "accc": 0}

        vid_pred[key]["vout"] = key_vout
        vid_pred[key]["aout"] = key_aout
        vid_label[key]["vtar"] = key_vtar
        vid_label[key]["atar"] = key_atar
        vid_ccc[key]['vccc'] = ccc(np.array(key_vout), np.array(key_vtar))
        vid_ccc[key]['accc'] = ccc(np.array(key_aout), np.array(key_atar))
        

    accV = ccc(np.array(vout), np.array(vtar))
    accA = ccc(np.array(aout), np.array(atar))

    print(accV)
    print(accA)
    return accV, accA, vid_pred, vid_label, vid_ccc


In [5]:
print("Number of Test samples:" + str(len(testdataset)))
Test_vacc, Test_aacc, vid_pred, vid_label, vid_ccc = Test(testloader, model, criterion, fusion_model)

Number of Test samples:22048


100%|██████████| 1378/1378 [1:43:36<00:00,  4.51s/it]


0.6018237380765902
0.5761364691168872


In [6]:
new_ccc_result = {} # {"vi FF FFd" : [valence, arousal, mean]}
for k in vid_ccc.keys():
    new_ccc_result[k] = [vid_ccc[k]['vccc'], vid_ccc[k]['accc'], np.mean([vid_ccc[k]['vccc'], vid_ccc[k]['accc']])]
    
new_ccc_result

{'211': [-0.07262394527845151, 0.256982839277211, 0.09217944699937974],
 '203': [0.1528944037887368, 0.5121113265687993, 0.33250286517876804],
 '325': [0.4565397779189026, 0.5656755163064809, 0.5111076471126917],
 '290': [0.5853654452920659, 0.4934554004655243, 0.539410422878795],
 '117-25-1920x1080': [0.38141316567909495,
  0.2642286690121335,
  0.32282091734561424],
 '123': [0.4269492932887261, 0.6631905546126232, 0.5450699239506747],
 '99-30-720x720': [0.7373197166467725,
  0.25666600459895084,
  0.4969928606228617],
 '46-30-484x360_left': [0.5339969276625177,
  0.5312816120102796,
  0.5326392698363986],
 '252': [0.6131513472574194, 0.6857433928501679, 0.6494473700537937],
 '140': [0.5710902108642422, 0.676556851777371, 0.6238235313208066],
 '51-30-1280x720': [-0.029055419141702305,
  0.45253757911314757,
  0.21174107998572264],
 '425': [0.5959955564427697, 0.7246061836270032, 0.6603008700348865],
 '288': [0.7027302934493633, 0.5770611144921779, 0.6398957039707707],
 '28-30-1280x720

In [7]:
sort_vid = sorted(new_ccc_result.items(), key=lambda x: x[1][2])
sort_vid

[('388', [-0.05410805127688569, 0.03017797132961105, -0.01196503997363732]),
 ('video66',
  [0.00696769078266363, 0.023719526601373987, 0.015343608692018808]),
 ('50-30-1920x1080',
  [0.04647612243568451, -0.01210870882393434, 0.017183706805875085]),
 ('98-30-360x360',
  [0.11805736688627258, 0.04756056282699024, 0.08280896485663142]),
 ('211', [-0.07262394527845151, 0.256982839277211, 0.09217944699937974]),
 ('94-30-1920x1080',
  [0.27470370593419485, -0.0442029668833234, 0.11525036952543573]),
 ('214', [-0.05742048754643841, 0.3258419230218001, 0.13421071773768084]),
 ('110-30-270x480',
  [0.23563685349436042, 0.04646611814395569, 0.14105148581915805]),
 ('video89', [-0.03629786536045985, 0.324485959814897, 0.14409404722721855]),
 ('118-30-640x480',
  [0.09341191158082747, 0.2118150211580163, 0.15261346636942189]),
 ('76-30-640x280',
  [-0.03424851630201494, 0.3909960853222859, 0.1783737845101355]),
 ('392', [0.028180120613132485, 0.33154210774741405, 0.17986111418027326]),
 ('17-24-

In [8]:
import pandas as pd
import os
import math
import openpyxl

xl_dir_path = "xl_dir"
if not os.path.exists(xl_dir_path):
    os.makedirs(xl_dir_path)

columns = ["file_name", "infer_val", "infer_aro", "infer_mean", "anno_val", "anno_aro", "anno_mean"]
file_values_dict = {c:[] for c in columns}

columns = list(file_values_dict.keys())
orig_label = vid_label

for vid,score_list in sort_vid:
    file_values_dict['file_name'].append(vid)
    
    file_values_dict['infer_val'].append(np.round(score_list[0], 3))
    file_values_dict['infer_aro'].append(np.round(score_list[1], 3))
    file_values_dict['infer_mean'].append(np.round(score_list[2], 3))
    
    
    orig_label_arousal = np.mean(orig_label[vid]['atar'])
    orig_label_valence = np.mean(orig_label[vid]['vtar'])
    
    file_values_dict['anno_val'].append(np.round(orig_label_arousal, 3))
    file_values_dict['anno_aro'].append(np.round(orig_label_valence, 3))
    
    orig_label_mean = np.mean([orig_label_arousal, orig_label_valence])
    file_values_dict['anno_mean'].append(np.round(orig_label_mean, 3))    
    
    
xl_file_name = f"tlab_score_csv_file_{SEED}.xlsx"
xl_file = os.path.join(xl_dir_path, xl_file_name)

if not os.path.exists(xl_file):
    wb=openpyxl.Workbook()
    wb.save(xl_file)
    
full_record_df = pd.DataFrame(file_values_dict, columns=columns) # todo    
    
# full_records 시트를 작성하여 파일을 생성
with pd.ExcelWriter(xl_file, mode='w', engine='openpyxl') as writer:
    full_record_df.to_excel(writer, sheet_name="full_records", index=False, encoding='utf-8')
    
with pd.ExcelWriter(xl_file, mode='a', engine='openpyxl') as writer:
    for vid, scores in sort_vid:
        va_df_dict = {
            "pred_val" : [],
            "pred_aro" : [],
            "label_val" : [],
            "label_aro" : []
        }
        va_df_dict["pred_val"].extend(vid_pred[vid]['vout'])
        va_df_dict["pred_aro"].extend(vid_pred[vid]['aout'])
        va_df_dict["label_val"].extend(vid_label[vid]['vtar'])
        va_df_dict["label_aro"].extend(vid_label[vid]['atar'])
        
        va_df = pd.DataFrame(va_df_dict, columns=va_df_dict.keys())
        va_df.to_excel(writer, sheet_name=vid, index=False)
        
    writer.save()    

In [9]:
result_path = "save/results/tlab_overall_results_ccc.csv"
record_v, record_a, record_m = f"{Test_vacc:.3f}", f"{Test_aacc:.3f}", f"{np.mean([Test_vacc, Test_aacc]):.3f}"

result_col = ["seed", "Valence_ccc", "Arousal_ccc", "Mean_CCC"]
record = [SEED, record_v, record_a, record_m]

df = pd.DataFrame([record], columns=result_col)
if not os.path.exists(result_path):
    df.to_csv(result_path, index=False)
else:
    df.to_csv(result_path, mode='a', header=False, index=False)

## Test Set에 대한 최종 표준편차까지 계산

In [10]:
import os
import pandas as pd
import numpy as np

data_path = "save/results/tlab_overall_results_ccc.csv"
data = pd.read_csv(data_path)

data = data[~(data['seed'] == "result")]
data.reset_index(inplace=True, drop=True)

for k in data:
    data[k] = data[k].astype(np.float32)

display(data)

res = {}
res['seed'] = "result"
res['Valence_ccc']  = f"{np.mean(data['Valence_ccc']):.3f} ± {np.std(data['Valence_ccc']):.3f}"
res['Arousal_ccc']  = f"{np.mean(data['Arousal_ccc']):.3f} ± {np.std(data['Arousal_ccc']):.3f}"
res['Mean_CCC']     = f"{np.mean(data['Mean_CCC']):.3f} ± {np.std(data['Mean_CCC']):.3f}"


data['seed'] = data['seed'].apply(lambda x : f"{x:.0f}")
data['Valence_ccc'] = data['Valence_ccc'].apply(lambda x : f"{x:.3f}")
data['Arousal_ccc'] = data['Arousal_ccc'].apply(lambda x : f"{x:.3f}")
data['Mean_CCC'] = data['Mean_CCC'].apply(lambda x : f"{x:.3f}")

display(data)

data = data.append(res, ignore_index=True)
data.to_csv(data_path, index=False)

Unnamed: 0,seed,Valence_ccc,Arousal_ccc,Mean_CCC
0,0.0,0.602,0.576,0.589


Unnamed: 0,seed,Valence_ccc,Arousal_ccc,Mean_CCC
0,0,0.602,0.576,0.589


In [11]:
# import pandas as pd

# # Load the Excel file
# file_path22 = 'xl_dir/paper3_score_csv_file_0_nan387.xlsx'

# # Load the 'full_records' sheet
# old_data = pd.read_excel(file_path22, sheet_name='full_records')

# # Display the first few rows of the dataframe to understand its structure
# old_data.head()


In [12]:
# import numpy as np

# data = old_data[~((old_data['file_name'] == '387') & (old_data['file_name'] == '389'))]
# np.mean(data.loc[:, ['infer_val', 'infer_aro', 'infer_mean']])