In [9]:
import random
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import librosa

from sklearn.tree import DecisionTreeClassifier

import warnings
warnings.filterwarnings(action='ignore') 


from transformers import pipeline

pipe = pipeline("audio-classification", model="Rajaram1996/Hubert_emotion")


CFG = {
    'SR':16000,
    'N_MFCC':32, # Melspectrogram 벡터를 추출할 개수
    'SEED':42
}


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(CFG['SEED']) # Seed 고정

working_dir = '/scratch/network/mk8574/audio_sentiment_challenge'

train_df = pd.read_csv(os.path.join(working_dir, 'data', 'train.csv'))
test_df = pd.read_csv(os.path.join(working_dir, 'data', 'test.csv'))

def get_mfcc_feature(df):
    features = []
    for path in tqdm(df['path']):
        # librosa패키지를 사용하여 wav 파일 load
        wav_path = os.path.join(working_dir, 'data', path)
        y, sr = librosa.load(wav_path, sr=CFG['SR'])
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CFG['N_MFCC'])
        y_feature = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature.append(np.mean(e))
        features.append(y_feature)

    mfcc_df = pd.DataFrame(features, columns=['mfcc_'+str(x) for x in range(1,CFG['N_MFCC']+1)])
    return mfcc_df

train_x = get_mfcc_feature(train_df)
test_x = get_mfcc_feature(test_df)

train_y = train_df['label']

model = DecisionTreeClassifier(random_state=CFG['SEED'])
model.fit(train_x, train_y)

preds = model.predict(test_x)

# submission = pd.read_csv(os.path.join(working_dir, 'baseline_dy', 'sample_submission.csv'))
# submission['label'] = preds
# submission.to_csv(os.path.join(working_dir, 'baseline_dy', 'baseline_submission.csv'), index=False)

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at Rajaram1996/Hubert_emotion and are newly initialized: ['classifier.weight', 'classifier.bias', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.weight', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 5001/5001 [00:20<00:00, 241.96it/s]
100%|██████████| 1881/1881 [00:09<00:00, 202.92it/s]


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
import sys

import matplotlib.pyplot as plt
import IPython.display as ipd

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [13]:
from glob import glob
from sklearn.model_selection import train_test_split
batch_size = 64

if device == "cuda":
    num_workers = 4
    pin_memory = True
else:
    num_workers = 0
    pin_memory = False
train_df = pd.read_csv(os.path.join(working_dir, 'data', 'train.csv'))
test_df = pd.read_csv(os.path.join(working_dir, 'data', 'test.csv'))

train_dirs = "/scratch/network/mk8574/audio_sentiment_challenge/data"
train_img_paths = glob(os.path.join(train_dirs, 'train', '*.wav'))
train_img_paths, val_img_paths = train_test_split(train_img_paths, test_size=0.2, random_state=CFG['SEED'], shuffle=True)
test_img_paths = glob(os.path.join(train_dirs, 'test', '*.wav'))
train_dataset = torch.utils.data.Dataset(paths=train_img_paths)
test_dataset = torch.utils.data.Dataset(paths = test_img_paths)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=num_workers,
    pin_memory=pin_memory,
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=num_workers,
    pin_memory=pin_memory,
)

TypeError: Dataset() takes no arguments

In [3]:
class M5(nn.Module):
    def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
        super().__init__()
        self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
        self.bn1 = nn.BatchNorm1d(n_channel)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(n_channel)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(2 * n_channel)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(2 * n_channel)
        self.pool4 = nn.MaxPool1d(4)
        self.fc1 = nn.Linear(2 * n_channel, n_output)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        x = F.avg_pool1d(x, x.shape[-1])
        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        return F.log_softmax(x, dim=2)


model = M5(n_input=transformed.shape[0], n_output=len(labels))
model.to(device)
print(model)


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


n = count_parameters(model)
print("Number of parameters: %s" % n)

NameError: name 'transformed' is not defined

In [4]:
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)  # reduce the learning after 20 epochs by a factor of 10

NameError: name 'model' is not defined

In [5]:
def train(model, epoch, log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):

        data = data.to(device)
        target = target.to(device)

        # apply transform and model on whole batch directly on device
        data = transform(data)
        output = model(data)

        # negative log-likelihood for a tensor of size (batch x 1 x n_output)
        loss = F.nll_loss(output.squeeze(), target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print training stats
        if batch_idx % log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

        # update progress bar
        pbar.update(pbar_update)
        # record loss
        losses.append(loss.item())

In [6]:
def number_of_correct(pred, target):
    # count number of correct predictions
    return pred.squeeze().eq(target).sum().item()


def get_likely_index(tensor):
    # find most likely label index for each element in the batch
    return tensor.argmax(dim=-1)


def test(model, epoch):
    model.eval()
    correct = 0
    for data, target in test_loader:

        data = data.to(device)
        target = target.to(device)

        # apply transform and model on whole batch directly on device
        data = transform(data)
        output = model(data)

        pred = get_likely_index(output)
        correct += number_of_correct(pred, target)

        # update progress bar
        pbar.update(pbar_update)

    print(f"\nTest Epoch: {epoch}\tAccuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n")

In [7]:
log_interval = 20
n_epoch = 2

pbar_update = 1 / (len(train_loader) + len(test_loader))
losses = []

# The transform needs to live on the same device as the model and the data.
transform = transform.to(device)
with tqdm(total=n_epoch) as pbar:
    for epoch in range(1, n_epoch + 1):
        train(model, epoch, log_interval)
        test(model, epoch)
        scheduler.step()

NameError: name 'train_loader' is not defined

In [3]:
from transformers import WhisperFeatureExtractor
transformers.__dir__()

['__name__',
 '__doc__',
 '__package__',
 '__loader__',
 '__spec__',
 '_modules',
 '_class_to_module',
 '__all__',
 '__file__',
 '__path__',
 '_objects',
 '_name',
 '_import_structure',
 'file_utils',
 'convert_slow_tokenizer',
 'dynamic_module_utils',
 'tokenization_utils_base',
 'tokenization_utils',
 'tokenization_utils_fast',
 'models',
 'audio_utils',
 'feature_extraction_utils',
 'feature_extraction_sequence_utils',
 'WhisperFeatureExtractor',
 'benchmark',
 'commands',
 'configuration_utils',
 'convert_graph_to_onnx',
 'convert_slow_tokenizers_checkpoints_to_fast',
 'convert_tf_hub_seq_to_seq_bert_to_pytorch',
 'data',
 'data.data_collator',
 'data.metrics',
 'data.processors',
 'debug_utils',
 'deepspeed',
 'dependency_versions_check',
 'dependency_versions_table',
 'generation',
 'hf_argparser',
 'hyperparameter_search',
 'image_transforms',
 'integrations',
 'modelcard',
 'modeling_tf_pytorch_utils',
 'models.albert',
 'models.align',
 'models.altclip',
 'models.audio_spectro

In [2]:
from transformers import pipeline
import pandas as pd
from tqdm import tqdm

dic = {
"ANGRY":0,
"FEAR":1,
"SAD":2,
"DISGUST":3,
"NEUTRAL":4,

"HAPPY":5

}
submission = pd.read_csv("/scratch/network/mk8574/audio_sentiment_challenge/baseline_dy/test_submission.csv")
preds = []
for i in tqdm(submission["label"]):
    preds.append(dic[i])
print(preds)
submission['label'] = preds
submission.to_csv("/scratch/network/mk8574/audio_sentiment_challenge/baseline_dy/test_submission.csv", index=False)

100%|██████████| 1881/1881 [00:00<00:00, 3042609.26it/s]

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 1, 5, 5, 5, 3, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 2, 2, 1, 2, 3, 1, 4, 2, 2, 1, 2, 1, 2, 4, 1, 5, 3, 2, 5, 2, 1, 2, 4, 1, 2, 2, 2, 4, 5, 1, 4, 5, 3, 4, 2, 2, 4, 5, 1, 2, 2, 5, 4, 2, 2, 3, 2, 2, 2, 5, 2, 1, 4, 2, 2, 5, 2, 5, 5, 2, 1, 4, 1, 5, 3, 5, 1, 4, 4, 1, 4, 3, 4, 4, 1, 4, 4, 5, 2, 3, 1, 3, 2, 2, 2, 4, 2, 3, 5, 1, 3, 2, 2, 1, 1, 2, 2, 5, 4, 2, 2, 2, 2, 4, 3, 3, 1, 4, 3, 1, 4, 2, 2, 1, 2, 5, 3, 4, 4, 2, 1, 4, 2, 3, 3, 2, 2, 4, 4, 1, 2, 4, 3, 5, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 3, 2, 3, 5, 3, 4, 2, 2, 5, 5, 2, 1, 3, 1, 4, 4, 2, 4, 4, 3, 2, 2, 2, 1, 1, 4, 2, 3, 2, 2, 2, 5, 1, 1, 4, 2, 3, 2, 5, 4, 1, 2, 5, 4, 2, 2, 3, 2, 1, 1, 2, 2, 1, 2, 2, 4, 2, 3, 5, 5, 2, 2, 2, 5, 1, 2, 5, 4, 2, 2, 2, 4, 4, 2, 2, 2, 4, 4, 2, 1, 2, 2, 1, 4, 2, 2, 4, 2, 4, 2, 2, 2, 4, 1, 1, 2, 1, 4, 4, 2, 2, 3, 4, 2, 2, 2, 4, 5, 2, 5, 3, 2, 2, 4, 4, 5, 4, 1, 5, 2, 5, 3, 1, 4, 2, 2, 2, 3, 2, 4, 2, 1, 4, 2, 2, 2, 2, 2, 2, 5, 1, 2, 5, 2, 3, 2, 2, 4, 2, 1, 4, 4, 2, 2, 2, 4, 2, 5, 


