In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [1]:
import importlib
import yaml, itertools
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio

import pandas as pd
import random

import numpy as np

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import os
import glob

import seaborn as sns


from sklearn.metrics import balanced_accuracy_score, confusion_matrix

In [18]:
from fvcore.nn import FlopCountAnalysis, flop_count_str

device = 'cuda'

bundle = torchaudio.pipelines.WAV2VEC2_BASE
base_w2v = bundle.get_model()
base_w2v.to(device)
base_w2v.eval()

for sec in [3, 5, 10, 15]:
    print()
    print(sec)
    inputs = torch.randn((1,16000*sec)).to(device)
    flops = FlopCountAnalysis(base_w2v, inputs)
    print(flop_count_str(flops).split('\n')[1])
    #print(flops.by_operator())
    #print(flops.by_module())
    #print(flops.by_module_and_operator())


3
  #params: 94.37M, #flops: 21.23G

5
  #params: 94.37M, #flops: 35.9G

10
  #params: 94.37M, #flops: 74.2G

15
  #params: 94.37M, #flops: 0.11T


In [20]:
from fvcore.nn import FlopCountAnalysis

device = 'cuda'

bundle = torchaudio.pipelines.WAV2VEC2_LARGE
base_w2v = bundle.get_model()
base_w2v.to(device)
base_w2v.eval()

for sec in [3, 5, 10, 15]:
    print()
    print(sec)
    inputs = torch.randn((1,16000*sec)).to(device)
    flops = FlopCountAnalysis(base_w2v, inputs)
    print(flop_count_str(flops).split('\n')[1])


3
  #params: 0.32G, #flops: 54.85G

5
  #params: 0.32G, #flops: 92.84G

10
  #params: 0.32G, #flops: 0.19T

15
  #params: 0.32G, #flops: 0.3T


In [31]:
import my_utils

for p in [1,2,3,4]:
    print('pool_', p)
    main_path = 'weight/only_emo/pool_' + str(p) #'Vox_IEMO(3)/main_final2_load_emo_beta/all_finetune_beta(05)'
    exp_list = os.listdir(main_path)
    exp_list.sort()

    exp = exp_list[0]
    mode = 'best'
    device = 'cuda'


    lib_path = glob.glob(main_path+'/'+exp+'/*only_emo*.py')[0][:-3].replace('/', '.')
    saved_main = importlib.import_module(lib_path)

    u_path = glob.glob(main_path+'/'+exp+'/my_utils.py')[0][:-3].replace('/', '.')
    my_utils = importlib.import_module(u_path)

    torch.cuda.empty_cache()

    with open(main_path+'/'+exp+"/hparams.yaml") as f:
        hparams = yaml.load(f, Loader=yaml.FullLoader)

    seed = hparams['seed']
    my_utils.set_seed(seed)

    net = saved_main.Emotion_Network(hparams)

    weight = torch.load(main_path+'/'+exp+"/"+mode+"_model.pt")
    missing_keys = net.load_state_dict(weight['model_state_dict'], strict=True)
    print(missing_keys)

    net = net.cuda()
    net.eval()

    # MIPS(Million Instructions Per Second)
    from fvcore.nn import FlopCountAnalysis

    for sec in [3, 5, 10, 15]:
        inputs = (torch.randn((1,16000*sec)).to(device), torch.tensor([0]).to(device), torch.tensor([0]).to(device))
        flops = FlopCountAnalysis(net, inputs)
        print(sec, flop_count_str(flops).split('\n')[2])

pool_ 1
<All keys matched successfully>
3   #params: 97.33M, #flops: 21.7G
5   #params: 97.33M, #flops: 36.73G
10   #params: 97.33M, #flops: 76.05G
15   #params: 97.33M, #flops: 0.12T
pool_ 2
<All keys matched successfully>
3   #params: 99.69M, #flops: 22.09G
5   #params: 99.69M, #flops: 37.41G
10   #params: 99.69M, #flops: 77.61G
15   #params: 99.69M, #flops: 0.12T
pool_ 3
<All keys matched successfully>
3   #params: 0.1G, #flops: 22.47G
5   #params: 0.1G, #flops: 38.1G
10   #params: 0.1G, #flops: 79.17G
15   #params: 0.1G, #flops: 0.12T
pool_ 4
<All keys matched successfully>
3   #params: 0.1G, #flops: 22.86G
5   #params: 0.1G, #flops: 38.78G
10   #params: 0.1G, #flops: 80.73G
15   #params: 0.1G, #flops: 0.13T


In [33]:
main_path = 'weight/main_final2_load_emo/freeze' #'Vox_IEMO(3)/main_final2_load_emo_beta/all_finetune_beta(05)'
exp_list = os.listdir(main_path)
exp_list.sort()

exp = exp_list[0]
mode = 'best'
device = 'cuda'

lib_path = glob.glob(main_path+'/'+exp+'/*main*.py')[0][:-3].replace('/', '.')
saved_main = importlib.import_module(lib_path)

u_path = glob.glob(main_path+'/'+exp+'/my_utils.py')[0][:-3].replace('/', '.')
my_utils = importlib.import_module(u_path)

torch.cuda.empty_cache()

with open(main_path+'/'+exp+"/hparams.yaml") as f:
    hparams = yaml.load(f, Loader=yaml.FullLoader)

seed = hparams['seed']
my_utils.set_seed(seed)

net = saved_main.Emotion_Network(hparams)

if hparams['id_net_freeze'] is not None:
    net.id_filter.data = torch.randn(1, hparams['pool_head']+1, hparams['fin_channel']) 

if hparams['id_net_freeze'] != 'freeze':
    net.id_net.hs.fc = nn.Parameter(torch.Tensor(8, hparams['fin_channel']))

weight = torch.load(main_path+'/'+exp+"/"+mode+"_model.pt")
missing_keys = net.load_state_dict(weight['model_state_dict'], strict=True)
print(missing_keys)

net = net.cuda()
net.eval()

# MIPS(Million Instructions Per Second)
from fvcore.nn import FlopCountAnalysis

for sec in [3, 5, 10, 15]:
    inputs = (torch.randn((1,16000*sec)).to(device), torch.tensor([0]).to(device), torch.tensor([0]).to(device))
    flops = FlopCountAnalysis(net, inputs)
    print(sec, flop_count_str(flops).split('\n')[2])

<All keys matched successfully>
3   #params: 0.2G, #flops: 44.47G
5   #params: 0.2G, #flops: 75.37G
10   #params: 0.2G, #flops: 0.16T
15   #params: 0.2G, #flops: 0.24T


In [35]:
main_path = 'weight/main_final2_load_emo/freeze' #'Vox_IEMO(3)/main_final2_load_emo_beta/all_finetune_beta(05)'
exp_list = os.listdir(main_path)
exp_list.sort()

exp = exp_list[0]
mode = 'best'
device = 'cuda'

lib_path = glob.glob(main_path+'/'+exp+'/*main*.py')[0][:-3].replace('/', '.')
saved_main = importlib.import_module(lib_path)

u_path = glob.glob(main_path+'/'+exp+'/my_utils.py')[0][:-3].replace('/', '.')
my_utils = importlib.import_module(u_path)

torch.cuda.empty_cache()

with open(main_path+'/'+exp+"/hparams.yaml") as f:
    hparams = yaml.load(f, Loader=yaml.FullLoader)

seed = hparams['seed']
my_utils.set_seed(seed)

net = saved_main.Emotion_Network(hparams)

if hparams['id_net_freeze'] is not None:
    net.id_filter.data = torch.randn(1, hparams['pool_head']+1, hparams['fin_channel']) 

if hparams['id_net_freeze'] != 'freeze':
    net.id_net.hs.fc = nn.Parameter(torch.Tensor(8, hparams['fin_channel']))

weight = torch.load(main_path+'/'+exp+"/"+mode+"_model.pt")
missing_keys = net.load_state_dict(weight['model_state_dict'], strict=True)
print(missing_keys)

net = net.cuda()
net.eval()

# MIPS(Million Instructions Per Second)
from fvcore.nn import FlopCountAnalysis

for sec in [3, 5, 10, 15]:
    inputs = (torch.randn((1,16000*sec)).to(device), torch.tensor([0]).to(device))
    flops = FlopCountAnalysis(net.id_net, inputs)
    print(sec, flop_count_str(flops).split('\n')[2])

<All keys matched successfully>
3   #params: 97.69M, #flops: 21.62G
5   #params: 97.69M, #flops: 36.59G
10   #params: 97.69M, #flops: 75.76G
15   #params: 97.69M, #flops: 0.12T
