In [None]:
import torch
import torchaudio

In [None]:
# requirement packages
#!pip install torchaudio
!pip install git+https://github.com/huggingface/datasets.git
!pip install git+https://github.com/huggingface/transformers.git
!pip install librosa

In [None]:
!pip install jiwer

In [None]:
%env LC_ALL=C.UTF-8
%env LANG=C.UTF-8
%env TRANSFORMERS_CACHE=/content/cache
%env HF_DATASETS_CACHE=/content/cache
%env CUDA_LAUNCH_BLOCKING=1

env: LC_ALL=C.UTF-8
env: LANG=C.UTF-8
env: TRANSFORMERS_CACHE=/content/cache
env: HF_DATASETS_CACHE=/content/cache
env: CUDA_LAUNCH_BLOCKING=1


In [None]:
import numpy as np
import pandas as pd


from pathlib import Path
from tqdm import tqdm

from sklearn.model_selection import train_test_split

import os
import sys

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df1 = pd.read_csv("/content/drive/MyDrive/annotations_10_sec_all_labels.csv")

In [None]:
df1.emotion.replace(to_replace=dict(anxiety="sad", calm="happy"), inplace=True)

In [None]:
import re

#df.path = [re.sub('/home/ayansinha/CVRamanLab/Data/processed/',"", x) for x in df.path]
df1['path']='/content/drive/MyDrive/' + df1['path'].astype(str)

In [None]:
df=df1

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,path,instrument,emotion
0,0,/content/drive/MyDrive/10_sec_chunks/anxiety_f...,flute,sad
1,1,/content/drive/MyDrive/10_sec_chunks/anxiety_f...,flute,sad
2,2,/content/drive/MyDrive/10_sec_chunks/anxiety_f...,flute,sad
3,3,/content/drive/MyDrive/10_sec_chunks/anxiety_f...,flute,sad
4,4,/content/drive/MyDrive/10_sec_chunks/anxiety_f...,flute,sad


In [None]:
len(df)

5614

In [None]:
save_path = "/content/drive/MyDrive/10_sec_chunks/"

train_df, test_df = train_test_split(df, test_size=0.2, random_state=101, stratify=df["emotion"])

train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

train_df.to_csv(f"{save_path}/train.csv", sep="\t", encoding="utf-8", index=False)
test_df.to_csv(f"{save_path}/test.csv", sep="\t", encoding="utf-8", index=False)


print(train_df.shape)
print(test_df.shape)

(4491, 4)
(1123, 4)


In [None]:
# Loading the created dataset using datasets
from datasets import load_dataset, load_metric


data_files = {
    "train": "/content/drive/MyDrive/10_sec_chunks/train.csv", 
    "validation": "/content/drive/MyDrive/10_sec_chunks/test.csv",
}

dataset = load_dataset("csv", data_files=data_files, delimiter="\t", )
train_dataset = dataset["train"]
eval_dataset = dataset["validation"]

print(train_dataset)
print(eval_dataset)

Using custom data configuration default-8fb0a7e00565d70a


Downloading and preparing dataset csv/default to /content/cache/csv/default-8fb0a7e00565d70a/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /content/cache/csv/default-8fb0a7e00565d70a/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

Dataset({
    features: ['Unnamed: 0', 'path', 'instrument', 'emotion'],
    num_rows: 4491
})
Dataset({
    features: ['Unnamed: 0', 'path', 'instrument', 'emotion'],
    num_rows: 1123
})


In [None]:
input_column = "path"
output_column = "emotion"

In [None]:
label_list = train_dataset.unique(output_column)
label_list.sort()  # Let's sort it for determinism
num_labels = len(label_list)
print(f"A classification problem with {num_labels} classes: {label_list}")

A classification problem with 2 classes: ['happy', 'sad']


In [None]:
from transformers import AutoConfig, Wav2Vec2Processor

In [None]:
model_name_or_path = "lighteternal/wav2vec2-large-xlsr-53-greek"
pooling_mode = "mean"

In [None]:
# config
config = AutoConfig.from_pretrained(
    model_name_or_path,
    num_labels=num_labels,
    label2id={label: i for i, label in enumerate(label_list)},
    id2label={i: label for i, label in enumerate(label_list)},
    finetuning_task="wav2vec2_clf",
)
setattr(config, 'pooling_mode', pooling_mode)

Downloading:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

In [None]:
processor = Wav2Vec2Processor.from_pretrained(model_name_or_path,)
target_sampling_rate = processor.feature_extractor.sampling_rate
print(f"The target sampling rate: {target_sampling_rate}")

Downloading:   0%|          | 0.00/158 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/535 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/138 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

The target sampling rate: 16000


In [None]:
def speech_file_to_array_fn(path):
    speech_array, sampling_rate = torchaudio.load(path)
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    return speech

def label_to_id(label, label_list):

    if len(label_list) > 0:
        return label_list.index(label) if label in label_list else -1

    return label

def preprocess_function(examples):
    speech_list = [speech_file_to_array_fn(path) for path in examples[input_column]]
    target_list = [label_to_id(label, label_list) for label in examples[output_column]]

    result = processor(speech_list, sampling_rate=target_sampling_rate)
    result["labels"] = list(target_list)

    return result

In [None]:
train_dataset = train_dataset.map(
    preprocess_function,
    batch_size=100,
    batched=True,
    num_proc=4
)
eval_dataset = eval_dataset.map(
    preprocess_function,
    batch_size=100,
    batched=True,
    num_proc=4
)

In [None]:
idx = 0
print(f"Training input_values: {train_dataset[idx]['input_values']}")
print(f"Training attention_mask: {train_dataset[idx]['attention_mask']}")
print(f"Training labels: {train_dataset[idx]['labels']} - {train_dataset[idx]['emotion']}")

Training labels: 1 - sad


In [None]:
from dataclasses import dataclass
from typing import Optional, Tuple
import torch
from transformers.file_utils import ModelOutput


@dataclass
class SpeechClassifierOutput(ModelOutput):
    loss: Optional[torch.FloatTensor] = None
    logits: torch.FloatTensor = None
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
    attentions: Optional[Tuple[torch.FloatTensor]] = None

In [None]:
import torch
import torch.nn as nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

from transformers.models.wav2vec2.modeling_wav2vec2 import (
    Wav2Vec2PreTrainedModel,
    Wav2Vec2Model
)


class Wav2Vec2ClassificationHead(nn.Module):
    """Head for wav2vec classification task."""

    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.dropout = nn.Dropout(config.final_dropout)
        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)

    def forward(self, features, **kwargs):
        x = features
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x


class Wav2Vec2ForSpeechClassification(Wav2Vec2PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.pooling_mode = config.pooling_mode
        self.config = config

        self.wav2vec2 = Wav2Vec2Model(config)
        self.classifier = Wav2Vec2ClassificationHead(config)

        self.init_weights()

    def freeze_feature_extractor(self):
        self.wav2vec2.feature_extractor._freeze_parameters()

    def merged_strategy(
            self,
            hidden_states,
            mode="mean"
    ):
        if mode == "mean":
            outputs = torch.mean(hidden_states, dim=1)
        elif mode == "sum":
            outputs = torch.sum(hidden_states, dim=1)
        elif mode == "max":
            outputs = torch.max(hidden_states, dim=1)[0]
        else:
            raise Exception(
                "The pooling method hasn't been defined! Your pooling mode must be one of these ['mean', 'sum', 'max']")

        return outputs

    def forward(
            self,
            input_values,
            attention_mask=None,
            output_attentions=None,
            output_hidden_states=None,
            return_dict=None,
            labels=None,
    ):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        outputs = self.wav2vec2(
            input_values,
            attention_mask=attention_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        hidden_states = outputs[0]
        hidden_states = self.merged_strategy(hidden_states, mode=self.pooling_mode)
        logits = self.classifier(hidden_states)

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SpeechClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )


In [None]:
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
import torch

import transformers
from transformers import Wav2Vec2Processor


@dataclass
class DataCollatorCTCWithPadding:
    """
    Data collator that will dynamically pad the inputs received.
    Args:
        processor (:class:`~transformers.Wav2Vec2Processor`)
            The processor used for proccessing the data.
        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
            among:
            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
              sequence if provided).
            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
              maximum acceptable input length for the model if that argument is not provided.
            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
              different lengths).
        max_length (:obj:`int`, `optional`):
            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
        max_length_labels (:obj:`int`, `optional`):
            Maximum length of the ``labels`` returned list and optionally padding length (see above).
        pad_to_multiple_of (:obj:`int`, `optional`):
            If set will pad the sequence to a multiple of the provided value.
            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
            7.5 (Volta).
    """

    processor: Wav2Vec2Processor
    padding: Union[bool, str] = True
    max_length: Optional[int] = None
    max_length_labels: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [feature["labels"] for feature in features]

        d_type = torch.long if isinstance(label_features[0], int) else torch.float

        batch = self.processor.pad(
            input_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        batch["labels"] = torch.tensor(label_features, dtype=d_type)

        return batch

In [None]:
data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)

In [None]:
is_regression = False

In [None]:
import numpy as np
from transformers import EvalPrediction


def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)

    if is_regression:
        return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
    else:
        return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

In [None]:
model = Wav2Vec2ForSpeechClassification.from_pretrained(
    model_name_or_path,
    config=config,
)

Downloading:   0%|          | 0.00/1.18G [00:00<?, ?B/s]

Some weights of the model checkpoint at lighteternal/wav2vec2-large-xlsr-53-greek were not used when initializing Wav2Vec2ForSpeechClassification: ['lm_head.bias', 'lm_head.weight']
- This IS expected if you are initializing Wav2Vec2ForSpeechClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForSpeechClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForSpeechClassification were not initialized from the model checkpoint at lighteternal/wav2vec2-large-xlsr-53-greek and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a d

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="/content/checkpoints",
    # output_dir="/content/gdrive/MyDrive/wav2vec2-xlsr-greek-speech-emotion-recognition"
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    evaluation_strategy="steps",
    #num_train_epochs=0.17,
    fp16=True,
    save_steps=100,
    eval_steps=10,
    logging_steps=10,
    learning_rate=1e-4,
    save_total_limit=2,
    max_steps=200,
    load_best_model_at_end=True,
)

In [None]:
from typing import Any, Dict, Union

import torch
from packaging import version
from torch import nn

from transformers import (
    Trainer,
    is_apex_available,
)

if is_apex_available():
    from apex import amp

if version.parse(torch.__version__) >= version.parse("1.6"):
    _is_native_amp_available = True
    from torch.cuda.amp import autocast


class CTCTrainer(Trainer):
    def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]]) -> torch.Tensor:
        """
        Perform a training step on a batch of inputs.

        Subclass and override to inject custom behavior.

        Args:
            model (:obj:`nn.Module`):
                The model to train.
            inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
                The inputs and targets of the model.

                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
                argument :obj:`labels`. Check your model's documentation for all accepted arguments.

        Return:
            :obj:`torch.Tensor`: The tensor with training loss on this batch.
        """

        model.train()
        inputs = self._prepare_inputs(inputs)

        if self.use_amp:
            with autocast():
                loss = self.compute_loss(model, inputs)
        else:
            loss = self.compute_loss(model, inputs)

        if self.args.gradient_accumulation_steps > 1:
            loss = loss / self.args.gradient_accumulation_steps

        if self.use_amp:
            self.scaler.scale(loss).backward()
        elif self.use_apex:
            with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
        elif self.deepspeed:
            self.deepspeed.backward(loss)
        else:
            loss.backward()

        return loss.detach()


In [None]:
trainer = CTCTrainer(
    model=model,
    data_collator=data_collator,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=processor.feature_extractor,
)

max_steps is given, it will override any value given in num_train_epochs
Using amp fp16 backend


In [None]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
10,0.7285,0.784473,0.494212
20,0.7397,0.713995,0.505788
30,0.7175,0.6872,0.524488
40,0.6968,0.678623,0.62244
50,0.6708,0.640289,0.682992
60,0.6307,0.659545,0.667854
70,0.6432,0.591059,0.69724
80,0.6288,0.629641,0.62333
90,0.6402,0.58358,0.705254
100,0.5784,0.55542,0.722173


TrainOutput(global_step=200, training_loss=0.603882417678833, metrics={'train_runtime': 5073.1597, 'train_samples_per_second': 0.315, 'train_steps_per_second': 0.039, 'total_flos': 4.8612920832e+17, 'train_loss': 0.603882417678833, 'epoch': 0.36})

In [None]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
10,0.4097,0.534091,0.742654
20,0.4565,0.534091,0.742654
30,0.6338,0.534091,0.742654
40,0.3988,0.534091,0.742654
50,0.4863,0.534091,0.742654
60,0.7535,0.534091,0.742654
70,0.5058,0.534091,0.742654
80,0.483,0.534091,0.742654
90,0.4732,0.534091,0.742654
100,0.4084,0.534091,0.742654


TrainOutput(global_step=200, training_loss=0.4965640068054199, metrics={'train_runtime': 5072.8794, 'train_samples_per_second': 0.315, 'train_steps_per_second': 0.039, 'total_flos': 4.8612920832e+17, 'train_loss': 0.4965640068054199, 'epoch': 0.36})

In [None]:
import librosa
from sklearn.metrics import classification_report

In [None]:
test_dataset = load_dataset("csv", data_files={"test": "/content/drive/MyDrive/10_sec_chunks/test.csv"}, delimiter="\t")["test"]
test_dataset

Using custom data configuration default-246a2f2350ed7ab2


Downloading and preparing dataset csv/default to /content/cache/csv/default-246a2f2350ed7ab2/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /content/cache/csv/default-246a2f2350ed7ab2/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['Unnamed: 0', 'path', 'instrument', 'emotion'],
    num_rows: 1123
})

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cuda


In [None]:
def speech_file_to_array_fn(batch):
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    speech_array = speech_array.squeeze().numpy()
    speech_array = librosa.resample(np.asarray(speech_array), sampling_rate, processor.feature_extractor.sampling_rate)

    batch["speech"] = speech_array
    return batch


def predict(batch):
    features = processor(batch["speech"], sampling_rate=processor.feature_extractor.sampling_rate, return_tensors="pt", padding=True)

    input_values = features.input_values.to(device)
    attention_mask = features.attention_mask.to(device)

    with torch.no_grad():
        logits = model(input_values, attention_mask=attention_mask).logits 

    pred_ids = torch.argmax(logits, dim=-1).detach().cpu().numpy()
    batch["predicted"] = pred_ids
    return batch

In [None]:
test_dataset = test_dataset.map(speech_file_to_array_fn)

  0%|          | 0/1123 [00:00<?, ?ex/s]

In [None]:
result = test_dataset.map(predict, batched=True, batch_size=8)

  0%|          | 0/141 [00:00<?, ?ba/s]

  return (input_length - kernel_size) // stride + 1


In [None]:
label_names = [config.id2label[i] for i in range(config.num_labels)]
label_names

In [None]:
y_true = [config.label2id[name] for name in result["emotion"]]
y_pred = result["predicted"]

print(y_true[:5])
print(y_pred[:5])

[1, 0, 1, 1, 0]
[1, 0, 0, 1, 0]


In [None]:
print(classification_report(y_true, y_pred, target_names=label_names))

              precision    recall  f1-score   support

       happy       0.79      0.65      0.72       555
         sad       0.71      0.83      0.77       568

    accuracy                           0.74      1123
   macro avg       0.75      0.74      0.74      1123
weighted avg       0.75      0.74      0.74      1123



In [None]:
!zip -r /content/models/checkpoints.zip /content/models/wav2vec2/

In [None]:
from google.colab import files
files.download("/content/checkpoints.zip")

In [None]:
!huggingface-cli login

In [None]:
!git clone https:://huggingface.co/ahanadeb/wav2vec2-large-indian-instrument-classification-v1 /content/models/

In [None]:
processor.save_pretrained("/content/wav2vec2-large-indian-instrument-emotion-classification-v1/")
model.save_pretrained("/content/wav2vec2-large-indian-instrument-emotion-classification-v1/")
config.save_pretrained("/content/wav2vec2-large-indian-instrument-emotion-classification-v1/")

Configuration saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/preprocessor_config.json
tokenizer config file saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/tokenizer_config.json
Special tokens file saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/special_tokens_map.json
added tokens file saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/added_tokens.json
Configuration saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/config.json
Model weights saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/pytorch_model.bin
Configuration saved in /content/wav2vec2-large-indian-instrument-emotion-classification-v1/config.json


In [None]:
tokenizer

In [None]:
!wget -O git-lfs.tar.gz https://github.com/git-lfs/git-lfs/releases/download/v2.13.2/git-lfs-linux-amd64-v2.13.2.tar.gz
!tar xzf git-lfs.tar.gz
!bash ./install.sh
!git lfs install

--2021-11-13 16:06:27--  https://github.com/git-lfs/git-lfs/releases/download/v2.13.2/git-lfs-linux-amd64-v2.13.2.tar.gz
Resolving github.com (github.com)... 52.192.72.89
Connecting to github.com (github.com)|52.192.72.89|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/13021798/31608d80-55cd-11eb-90aa-129d4821d135?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20211113%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20211113T160627Z&X-Amz-Expires=300&X-Amz-Signature=bbdb8f3403084c856f76a27159aa982b2a7dc6e7591406290479005677c06fb1&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=13021798&response-content-disposition=attachment%3B%20filename%3Dgit-lfs-linux-amd64-v2.13.2.tar.gz&response-content-type=application%2Foctet-stream [following]
--2021-11-13 16:06:28--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/13021798/31608d80-5

In [None]:
!git lfs install
!git clone https://huggingface.co/ahanadeb/wav2vec2-large-indian-instrument-classification-v1

In [None]:
%cd wav2vec2-large-indian-instrument-emotion-classification-v1
!ls

/content/wav2vec2-large-indian-instrument-emotion-classification-v1
added_tokens.json	  pytorch_model.bin	   vocab.json
config.json		  special_tokens_map.json
preprocessor_config.json  tokenizer_config.json


In [None]:
!ls

In [None]:
!git status

On branch main
Your branch is up to date with 'origin/main'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	[31madded_tokens.json[m
	[31mconfig.json[m
	[31mpreprocessor_config.json[m
	[31mpytorch_model.bin[m
	[31mspecial_tokens_map.json[m
	[31mtokenizer_config.json[m
	[31mvocab.json[m

nothing added to commit but untracked files present (use "git add" to track)


In [None]:
!git add .

In [None]:
!git status

On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	[32mnew file:   added_tokens.json[m
	[32mnew file:   config.json[m
	[32mnew file:   preprocessor_config.json[m
	[32mnew file:   pytorch_model.bin[m
	[32mnew file:   special_tokens_map.json[m
	[32mnew file:   tokenizer_config.json[m
	[32mnew file:   vocab.json[m



In [None]:
!git commit -m "Add model"

[main 0aa7559] Add model
 7 files changed, 113 insertions(+)
 create mode 100644 added_tokens.json
 create mode 100644 config.json
 create mode 100644 preprocessor_config.json
 create mode 100644 pytorch_model.bin
 create mode 100644 special_tokens_map.json
 create mode 100644 tokenizer_config.json
 create mode 100644 vocab.json


In [None]:
!git config --global user.email "ahanadeb01@gmail.com"
!git config --global user.name "ahanadeb"

In [None]:
!git remote -v

origin	https://ahandeb:api_HadhozLgQEdMZlXqafsFVHkFsnlwIRnUqh@huggingface.co/ahanadeb/wav2vec2-large-indian-instrument-emotion-classification-v1 (fetch)
origin	https://ahandeb:api_HadhozLgQEdMZlXqafsFVHkFsnlwIRnUqh@huggingface.co/ahanadeb/wav2vec2-large-indian-instrument-emotion-classification-v1 (push)


In [None]:
!sudo apt-get install git-lfs

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  git-lfs
0 upgraded, 1 newly installed, 0 to remove and 37 not upgraded.
Need to get 2129 kB of archives.
After this operation, 7662 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 git-lfs amd64 2.3.4-1 [2129 kB]
Fetched 2129 kB in 2s (895 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package git-lfs.
(Reading database ... 155219 files and directories current

In [None]:
!git push

ahanadeb
Counting objects: 9, done.
Delta compression using up to 4 threads.
Compressing objects: 100% (8/8), done.
Writing objects: 100% (9/9), 2.23 KiB | 2.23 MiB/s, done.
Total 9 (delta 0), reused 0 (delta 0)
To https://huggingface.co/ahanadeb/wav2vec2-large-indian-instrument-emotion-classification-v1
   41a0787..0aa7559  main -> main


In [None]:
%cd ..

In [None]:
!pwd

In [None]:
!git clone https://ahandeb:api_HadhozLgQEdMZlXqafsFVHkFsnlwIRnUqh@huggingface.co/ahanadeb/wav2vec2-large-indian-instrument-emotion-classification-v1

Cloning into 'wav2vec2-large-indian-instrument-emotion-classification-v1'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0)[K
Unpacking objects: 100% (3/3), done.


In [None]:
!rm -rf /content/wav2vec2-large-indian-instrument-emotion-classification-v1/