In [1]:
#!g1.1
import json
import os
import torch
from tqdm import tqdm
import scipy.io.wavfile as wav

from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader, Sampler
from torch.utils.tensorboard import SummaryWriter

import numpy as np
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
import math

import torchaudio
from boltons.fileutils import iter_find_files

In [2]:
#!g1.1
# %pip install pytorch_lightning --upgrade

In [3]:
#!g1.1
from models import ConvFeatureEncoder, SegmentsRepr, SegmentsEncoder, NegativeSampler, SegmentPredictor, FinModel, FinModel1
from utils import ConstrativeLoss, sample_negatives
# from trainer import Trainer
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

In [4]:
#!g1.1
# import wandb

In [5]:
#!g1.1
def spectral_size(wav_len):
    layers = [(10,5,0), (8,4,0), (4,2,0), (4,2,0), (4,2,0)]
    for kernel, stride, padding in layers:
        wav_len = math.floor((wav_len + 2*padding - 1*(kernel-1) - 1)/stride + 1)
    return wav_len

In [6]:
#!g1.1
# Данный класс основан на https://github.com/felixkreuk/UnsupSeg/blob/master/dataloader.py

class WavPhnDataset(Dataset):
    def __init__(self, path):
        self.path = path
        self.data = list(iter_find_files(self.path, "*.wav"))
        super(WavPhnDataset, self).__init__()

    @staticmethod
    def get_datasets(path):
        raise NotImplementedError

    def process_file(self, wav_path):
        phn_path = wav_path.replace(".wav", ".txt")
        filetext_id = phn_path.split('/')[-1]

        # load audio
        audio, sr = torchaudio.load(wav_path)
        audio = audio[0]
        audio_len = len(audio)

        # load labels -- segmentation and phonemes
        with open(phn_path, "r") as f:
            lines = [i.strip() for i in f.readlines()]
            times = torch.FloatTensor([eval(i.split()[0]) for i in lines])[:-1]
            phonemes = torch.FloatTensor([eval(i.split()[0])*16000 for i in lines])[:-1]

        return audio, times, phonemes, wav_path, filetext_id, phn_path

    def spectral_size(self, wav_len):
        layers = [(10,5,0), (8,4,0), (4,2,0), (4,2,0), (4,2,0)]
        for kernel, stride, padding in layers:
            wav_len = math.floor((wav_len + 2*padding - 1*(kernel-1) - 1)/stride + 1)
        return wav_len
    
    def __getitem__(self, idx):
        signal, seg, phonemes, fname, filetext_id, segment_file = self.process_file(self.data[idx])
        
        return {'audio_file':fname, 
                'segment_file':segment_file, 
                'id':filetext_id, 
                'sample': signal, 
                'length': len(signal), 
                'spectral_size': self.spectral_size(len(signal)),
                'boundaries': seg}
        
    def __len__(self):
        return len(self.data)

In [7]:
#!g1.1
def collate_fn(samples):
    
    max_length = max([sample['length'] for sample in samples])
    boundaries = [sample['boundaries'] for sample in samples]
    spectral_sizes = [sample['spectral_size'] for sample in samples]
    samples1 = []
    lengths = []
    samplings = []
    attentions = []
    ids = []
    audio_files = []
    segment_files = []
    for sample in samples:
        to_add_l = max_length-sample['length']
        sample1 = list(sample['sample'])+[0]*to_add_l
        samples1.append(torch.Tensor(sample1).unsqueeze(0))
        lengths.append(sample['length'])
        ids.append(sample['id'])
        audio_files.append(sample['audio_file'])
        segment_files.append(sample['segment_file'])
        att_norm = torch.ones(size = (1, sample['length']))
        att_add = torch.zeros(size = (1, to_add_l))
        att = torch.cat([att_norm, att_add], dim = -1)
        attentions.append(att)
        
    batch = torch.cat(samples1)
    lengths = torch.Tensor(lengths)
    attention_mask = torch.cat(attentions, dim = 0)
    spectral_size = torch.Tensor(spectral_sizes)
    
    return dict(batch=batch, lengths=lengths, attention_mask=attention_mask, 
                boundaries=boundaries, ids=ids, 
                audio_files=audio_files, 
                segment_files=segment_files, 
                spectral_size=spectral_size)

In [8]:
#!g1.1


In [22]:
#!g1.1
train_dataset = WavPhnDataset('Buckeye_fin/Train')
val_dataset = WavPhnDataset('Buckeye_fin/Valid')

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

In [11]:
#!g1.1


In [12]:
#!g1.1
accumulate_grad_batches = 1

In [13]:
#!g1.1
cfg = {'optimizer': "adam",
'momentum': 0.9,
'learning_rate': 0.0001*accumulate_grad_batches,
'lr_anneal_gamma': 1.0,
'lr_anneal_step': 1000,
# 'epochs': 500,
'grad_clip': 0.5,
'batch_size': 8,

'conv_args': {},
'mask_args': {"segment": "first", "add_one": False},
'segm_enc_args': {},
'segm_predictor_args': {},
'loss_args': {"n_negatives": 1, "loss_args": {"reduction": "mean"}},
'num_epoch': 2}

In [14]:
#!g1.1
class Conf:
    def __init__(self, my_dict):
        for key, value in my_dict.items():
            setattr(self, key, value)
            
config = Conf(cfg)

In [15]:
#!g1.1
import warnings
warnings.filterwarnings("ignore")

In [35]:
#!g1.1

AVAIL_GPUS = min(1, torch.cuda.device_count())

train_dataset = WavPhnDataset('Buckeye_fin/Train')
val_dataset = WavPhnDataset('Buckeye_fin/Valid')

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = FinModel(config)

logger = TensorBoardLogger("tb_logs_pl_r_val_acc_200_edges_train_buckeye_model_segment", name="my_model")

checkpoint_callback = ModelCheckpoint(
    monitor="val_r_metr",
    dirpath="./",
    filename="golos_model_segment_r_val_acc_200_edges_train_buckeye_model_segment",
    save_top_k=3,
    mode="max",
)
trainer = Trainer(max_epochs=20,
                  gpus=AVAIL_GPUS,
                  progress_bar_refresh_rate=1,
                  logger=logger,
#                   accumulate_grad_batches=accumulate_grad_batches,
                  gradient_clip_val=2, 
                  callbacks=[checkpoint_callback])


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [36]:
#!g1.1
%tensorboard --logdir ./tb_logs_pl_r_val_acc_200_edges_train_buckeye_model_segment

Launching TensorBoard...


In [None]:
#!g1.1
trainer.fit(model, train_loader, val_loader)

In [None]:
#!g1.1


In [None]:
#!g1.1


In [108]:
#!g1.1


In [109]:
#!g1.1


In [44]:
#!g1.1
accumulate_grad_batches = 1

In [45]:
#!g1.1
cfg = {'optimizer': "adam",
'momentum': 0.9,
'learning_rate': 0.0001*accumulate_grad_batches,
'lr_anneal_gamma': 1.0,
'lr_anneal_step': 1000,
# 'epochs': 500,
'grad_clip': 0.5,
'batch_size': 8,

'conv_args': {},
'mask_args': {"segment": "first", "add_one": False},
'segm_enc_args': {},
'segm_predictor_args': {},
'loss_args': {"n_negatives": 1, "loss_args": {"reduction": "mean"}},
'num_epoch': 2}

In [46]:
#!g1.1
class Conf:
    def __init__(self, my_dict):
        for key, value in my_dict.items():
            setattr(self, key, value)
            
config = Conf(cfg)

In [47]:
#!g1.1
import warnings
warnings.filterwarnings("ignore")

In [48]:
#!g1.1

AVAIL_GPUS = min(1, torch.cuda.device_count())

train_dataset = WavPhnDataset('Buckeye_fin/Train')
val_dataset = WavPhnDataset('Buckeye_fin/Valid')

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = FinModel1(config)

logger = TensorBoardLogger("tb_logs_pl_r_val_acc_200_edges_train_buckeye_peak_detection", name="my_model")

checkpoint_callback = ModelCheckpoint(
    monitor="val_r_metr",
    dirpath="./",
    filename="golos_model_segment_r_val_acc_200_edges_train_buckeye_peak_detection",
    save_top_k=3,
    mode="max",
)
trainer = Trainer(max_epochs=20,
                  gpus=AVAIL_GPUS,
                  progress_bar_refresh_rate=1,
                  logger=logger,
#                   accumulate_grad_batches=accumulate_grad_batches,
                  gradient_clip_val=2, 
                  callbacks=[checkpoint_callback])


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [1]:
#!g1.1
%tensorboard --logdir ./tb_logs_pl_r_val_acc_200_edges_train_buckeye_peak_detection

Launching TensorBoard...


In [50]:
#!g1.1
trainer.fit(model, train_loader, val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type               | Params
-------------------------------------------------------
0 | conv_encoder    | ConvFeatureEncoder | 1.3 M 
1 | frame_predictor | NegativeSampler    | 0     
2 | segment_mean    | SegmentsRepr       | 0     
3 | attention_calc  | AttentionCalc      | 0     
4 | metr            | RMetrics           | 0     
5 | unsup_loss      | UnsupLoss          | 0     
6 | bounds          | GetBound           | 0     
-------------------------------------------------------
1.3 M     Trainable params
0         Non-trainable params
1.3 M     Total params
5.261     Total estimated model params size (MB)


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0.0005
)




In [None]:
#!g1.1


In [None]:
#!g1.1


In [None]:
#!g1.1
# Transformers

In [None]:
#!g1.1


In [45]:
#!g1.1
# %pip install --upgrade transformers
# %pip install datasets
# %pip install huggingface_hub

In [46]:
#!g1.1
# %pip install --upgrade pip

In [47]:
#!g1.1
import json
import os
import torch
from tqdm import tqdm
import scipy.io.wavfile as wav

from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader, Sampler
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import torchaudio

In [48]:
#!g1.1
import transformers
import json
from torch.utils.data import Dataset, DataLoader, Sampler
import os

In [49]:
#!g1.1
from models import ConvFeatureEncoder, SegmentsRepr, SegmentsEncoder, NegativeSampler, SegmentPredictor, FinModel, FinModel1
from utils import ConstrativeLoss, sample_negatives

In [50]:
#!g1.1
from model_transformers import SegmentTransformer
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

In [51]:
#!g1.1
transformers.__version__

'4.14.0'

In [56]:
#!g1.1


In [57]:
#!g1.1
accumulate_grad_batches = 1

In [58]:
#!g1.1
cfg = {'model_path':"facebook/wav2vec2-base-960h",
       'mask': False,
       'optimizer': "adam",
       'momentum': 0.9,
'learning_rate': 0.0001*accumulate_grad_batches,
'lr_anneal_gamma': 1.0,
'lr_anneal_step': 1000,
'epochs': 200,
'grad_clip': 0.5,
'batch_size': 8,

'conv_args': {},
'mask_args': {},
'segm_enc_args': {},
'segm_predictor_args': {},
'loss_args': {"n_negatives": 1, "loss_args": {"reduction": "mean"}},
'num_epoch': 2}

In [59]:
#!g1.1
class Conf:
    def __init__(self, my_dict):
        for key, value in my_dict.items():
            setattr(self, key, value)
            
config = Conf(cfg)

In [60]:
#!g1.1
import warnings
warnings.filterwarnings("ignore")
from collections import OrderedDict

In [61]:
#!g1.1
AVAIL_GPUS = min(1, torch.cuda.device_count())

train_dataset = WavPhnDataset('Buckeye_fin/Train')
val_dataset = WavPhnDataset('Buckeye_fin/Valid')

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = SegmentTransformer(config)

logger = TensorBoardLogger("tb_logs_pl_transformers_acc_10_ep_500_r_val_edges_train_buckeye_new_loss", name="my_model")

checkpoint_callback = ModelCheckpoint(
    monitor="val_r_metr",
    dirpath="./",
    filename="golos_model_segment_r_val_transformers_acc_10_ep_500_r_val_edges_train_buckeye_new_loss",
    save_top_k=3,
    mode="max",
)
trainer = Trainer(max_epochs=20,
                  gpus=AVAIL_GPUS,
                  progress_bar_refresh_rate=1,
                  logger=logger,
#                   accumulate_grad_batches=accumulate_grad_batches,
                  gradient_clip_val=2, 
                  callbacks=[checkpoint_callback])






Some weights of the model checkpoint at facebook/wav2vec2-base-960h were not used when initializing Wav2Vec2ModelForSegmentation: ['lm_head.weight', 'lm_head.bias']
- This IS expected if you are initializing Wav2Vec2ModelForSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ModelForSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ModelForSegmentation were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.project_back.weight', 'wav2vec2.project_back.bias', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and i

HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=1596.0), HTML(value='')))

HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=377667514.0), HTML(value='')))

In [62]:
#!g1.1
%tensorboard --logdir ./tb_logs_pl_transformers_acc_10_ep_500_r_val_edges_buckeye_hours_new_loss

Launching TensorBoard...


In [63]:
#!g1.1
trainer.fit(model, train_loader, val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type                         | Params
--------------------------------------------------------------
0 | wav2vec_segm | Wav2Vec2ModelForSegmentation | 94.5 M
--------------------------------------------------------------
94.5 M    Trainable params
0         Non-trainable params
94.5 M    Total params
378.013   Total estimated model params size (MB)


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0.0005
)



HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

In [None]:
#!g1.1


In [None]:
#!g1.1


In [None]:
#!g1.1
