# Step 1: Imports

In [None]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
from zipfile import ZipFile, Path
import librosa
from IPython.display import Audio
import IPython.display as ipd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchaudio
from torchsummary import summary
import torchvision
from torchvision import datasets, models, transforms
import torch.optim as optim
from tqdm import tqdm
import wandb 
from torch.cuda.amp import GradScaler, autocast

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
class CFG:
    n_size = 15                # 5 10  15
    sr = 32000
    batch_size = 256             #512  r64
    learning_rate= 0.0001      
    architecture= "CNN"      #['CNN','resnet34', 'M5']
    epochs= 10
    seed = 15
    api = 
    project = 'birdCLEF'
    entity = 
    wandb = False
    if device == "cuda":
        num_workers = 1
        pin_memory = True
    else:
        num_workers = 0
        pin_memory = False

In [None]:
def class2dict(f):
    return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))

In [None]:
seed = CFG.seed
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random_state= 15
os.environ['PYTHONHASHSEED'] = str(seed)
torch.backends.cudnn.deterministic = True 
torch.backends.cudnn.benchmark = False 

In [None]:
drive.mount('/content/drive')

In [None]:
with ZipFile('birdclef-2023.zip') as myzip:
        print(len(myzip.namelist()))
        print(myzip.namelist()[:10])
        csv_list = []
        for name in myzip.namelist():
            if name.endswith('.csv'):
                csv_list.append(name)
        print(csv_list)

In [None]:
%%time
with ZipFile('birdclef-2023.zip') as myzip:
    data_train = myzip.open('train_metadata.csv')
    data_sample = myzip.open('sample_submission.csv')
    data_ebird = myzip.open('eBird_Taxonomy_v2021.csv')
    
df_train = pd.read_csv(data_train)
df_sample = pd.read_csv(data_sample)
df_ebird = pd.read_csv(data_ebird)

# Step 2: Explore the training data

## review

In [None]:
df_train.head(3)

In [None]:
df_ebird.head()

In [None]:
df_sample

In [None]:
%%time
with ZipFile('/content/drive/MyDrive/birdclef-2023.zip') as myzip:
    data_1 = myzip.open('train_audio/abethr1/XC128013.ogg')
    data_2 = myzip.open('train_audio/abhori1/XC120250.ogg')
au_abe, sr_abe = librosa.load(data_1)
au_abh, sr_abh = librosa.load(data_2)      

In [None]:
Audio(data=au_abe, rate=sr_abe)

In [None]:
Audio(data=au_abh, rate=sr_abh)

In [None]:
# abethr1
plt.figure(figsize=(6,2))
plt.plot(au_abe)
plt.show()
# abhori1
plt.figure(figsize=(6,2))
plt.plot(au_abh)
plt.show()

In [None]:
%%time
with ZipFile('/content/drive/MyDrive/birdclef-2023.zip') as myzip:
      data_test = myzip.open('test_soundscapes/soundscape_29201.ogg')
au_test, sr_test = librosa.load(data_test)
Audio(data=au_test, rate= sr_test)
print(sr_test)
print(len(au_test)/sr_test)

In [None]:
plt.figure(figsize=(6,2))
plt.plot(au_test)
plt.show()

## waveform_list

In [None]:
def get_train_data_(filename):
    with ZipFile('birdclef-2023.zip') as myzip:
        return myzip.open(f'train_audio/{filename}')

def low_count_classes(series):
      return series.value_counts()[series.value_counts() < 2].index.tolist()

def splitting(df, split_size):
      df['train'] = df['primary_label'].isin(low_count_classes(df['primary_label']))
      df_without_single =df[~df['train']]
      max_df, min_df = train_test_split(df_without_single, test_size=split_size, stratify=df_without_single['primary_label'], random_state=random_state)
      max_df = pd.concat([max_df, df[df['train']]], axis=0).reset_index(drop=True)
      max_df.drop('train', axis=1, inplace=True)
      min_df.drop('train', axis=1, inplace=True)
      return max_df, min_df

In [None]:
df1, df2 = splitting(df_train, 0.5)              #in case lack of memory
train_df,val_df = splitting(df1, 0.3001)

In [None]:
def label_list(df, encoder):                     #first option
      list_filename =df['filename'].tolist()
      le = encoder
      labels = le.fit_transform (df['primary_label'])
      zipped = zip(labels, list_filename)
      return list(zipped), set(labels)
zipped_list_train, classes = label_list(train_df, LabelEncoder())  
zipped_list_val, _ = label_list(val_df, LabelEncoder()) 

In [None]:
def label_lists(df):                               #sec option
      list_filename =df['filename'].tolist()
      labels = df['primary_label']
      zipped = zip(labels, list_filename)
      return list(zipped), set(labels)
list_train, classes = label_lists(train_df)  
list_val, _ = label_lists(val_df) 

In [None]:
train_set =[]                                          #Mel here help decrease allocated memory
for label, item in tqdm (list_train):
    waveform, sample_rate = torchaudio.load(get_train_data_(item))
    if waveform.shape[1] > CFG.n_size*CFG.sr:
        waveform = torch.narrow(waveform, 1,0,CFG.n_size*CFG.sr)               
    else:
        delta = CFG.n_size*CFG.sr- waveform.shape[1]
        waveform = F.pad(waveform,(0,delta), "constant", 0) 
    waveform = torchaudio.transforms.MelSpectrogram(n_fft = 2504)(waveform)
    train_set.append([waveform, sample_rate, label])  

In [None]:
print("Shape of waveform: {}".format(waveform.size()))
print("Sample rate of waveform: {}".format(sample_rate))

plt.figure()
plt.imshow(waveform.log2()[0,:,:].numpy(), cmap='viridis')  
plt.title(f'{label}');

In [None]:
labels = sorted(list(set(datapoint[2] for datapoint in train_set)))
len(labels)

In [None]:
val_set =[]                                        
for label, item in tqdm (list_val):
    waveform, sample_rate = torchaudio.load(get_train_data_(item))
    if waveform.shape[1] > CFG.n_size*CFG.sr:
        waveform = torch.narrow(waveform, 1,0,CFG.n_size*CFG.sr)               
    else:
        delta = CFG.n_size*CFG.sr- waveform.shape[1]
        waveform = F.pad(waveform,(0,delta), "constant", 0) 
    waveform = torchaudio.transforms.MelSpectrogram(n_fft = 2504)(waveform)    
    val_set.append([waveform, sample_rate, label])  

In [None]:
def label_to_index(word):
    # Return the position of the word in labels
    return torch.tensor(labels.index(word))

In [None]:
def index_to_label(index):
    # Return the word corresponding to the index in labels
    # This is the inverse of label_to_index
    return labels[index]

In [None]:
def get_test_data_(filename):
    with ZipFile('birdclef-2023.zip') as myzip:
        return myzip.open(f'test_soundscapes/{filename}')
waveform, sample_rate = torchaudio.load(get_test_data_('soundscape_29201.ogg'))
if waveform.shape[1] > CFG.n_size*CFG.sr:
    waveform = torch.narrow(waveform, 1,0,CFG.n_size*CFG.sr)
else:
    delta = CFG.n_size*CFG.sr- waveform.shape[1]
    waveform = F.pad(waveform,(0,delta), "constant", 0)
mel_spectrogram_test = torchaudio.transforms.MelSpectrogram(sample_rate= sample_rate, n_fft = 2504)(waveform)  
mel_spectrogram_test.shape  

In [None]:
transform = torch.nn.Sequential(
     nn.Flatten(2,3) # for batch, check for tensor
    )

# Step 3: Modelling

## dataset

In [None]:
def collate_fn(batch):

    # A data tuple has the form:
    # waveform, sample_rate, label,       

    tensors, targets = [], []

    # Gather in lists, and encode labels as indices
    for waveform, _, label in batch:
        tensors += [waveform]
        targets += [label_to_index(label)]

    # Group the list of tensors into a batched tensor
  
    targets = torch.stack(targets)
    tensors = torch.stack(tensors)

    return tensors, targets

In [None]:
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=CFG.batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers= CFG.num_workers,
    pin_memory=CFG.pin_memory,
)
test_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=CFG.batch_size,
    shuffle=False,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=CFG.num_workers,
    pin_memory=CFG.pin_memory,
)

In [None]:
#----- without function, first opt
train_loader = torch.utils.data.DataLoader(wavet, batch_size=CFG.batch_size,shuffle=True,  num_workers=CFG.num_workers,pin_memory=CFG.pin_memory)
val_loader = torch.utils.data.DataLoader(wavev, batch_size=CFG.batch_size,shuffle=True,  num_workers=CFG.num_workers,pin_memory=CFG.pin_memory)

In [None]:
for batch, (X, Y) in enumerate(train_loader):    #pls check carefully
    print(batch,X.shape,transform(X).shape, Y.shape)
    print(batch,type(X), type(Y))
    break

## M5 pytorch tutorial

In [None]:
class M5(nn.Module):
    def __init__(self, n_input=1, n_output=226, stride=16, n_channel=32):
        super().__init__()
        self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
        self.bn1 = nn.BatchNorm1d(n_channel)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(n_channel)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(2 * n_channel)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(2 * n_channel)
        self.pool4 = nn.MaxPool1d(4)
        self.fc1 = nn.Linear(2 * n_channel, n_output)
        # self.fl1 = nn.Flatten(2,3)

    def forward(self, x):
        # x = self.fl1(x)
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        x = F.avg_pool1d(x, x.shape[-1])
        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        return F.log_softmax(x, dim=2)

transformed = mel_spectrogram_test
model = M5(n_input=transformed.shape[0], n_output=len(labels))
model.to(device)
print(model)


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


n = count_parameters(model)
print("Number of parameters: %s" % n)

In [None]:
summary(model, input_size=( 1, 128,128))

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)  # reduce the learning after 20 epochs by a factor of 10

In [None]:
def train(model, epoch, log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):

        data = data.to(device)
        target = target.to(device)

        # apply transform and model on whole batch directly on device
        data = transform(data)
        output = model(data)

        # negative log-likelihood for a tensor of size (batch x 1 x n_output)
        loss = F.nll_loss(output.squeeze(), target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print training stats
        if batch_idx % log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

        # update progress bar
        pbar.update(pbar_update)
        # record loss
        losses.append(loss.item())

In [None]:
def number_of_correct(pred, target):
    # count number of correct predictions
    return pred.squeeze().eq(target).sum().item()


def get_likely_index(tensor):
    # find most likely label index for each element in the batch
    return tensor.argmax(dim=-1)


def test(model, epoch):
    model.eval()
    correct = 0
    for data, target in test_loader:

        data = data.to(device)
        target = target.to(device)

        # apply transform and model on whole batch directly on device
        data = transform(data)
        output = model(data)

        pred = get_likely_index(output)
        correct += number_of_correct(pred, target)

        # update progress bar
        pbar.update(pbar_update)

    print(f"\nTest Epoch: {epoch}\tAccuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n")

In [None]:
log_interval = 20
n_epoch = 50

pbar_update = 1 / (len(train_loader) + len(test_loader))
losses = []

# The transform needs to live on the same device as the model and the data.
transform = transform.to(device)
with tqdm(total=n_epoch) as pbar:
    for epoch in range(1, n_epoch + 1):
        train(model, epoch, log_interval)
        test(model, epoch)
        scheduler.step()

In [None]:
# Let's plot the training loss versus the number of iteration.
plt.plot(losses);
plt.title("training loss");

In [None]:
def predict(tensor):
    # Use the model to predict the label of the waveform
    tensor = tensor.to(device)
    # tensor = transform(tensor) # flatten different for batch / tensor
    tensor = tensor.reshape(1, -1)
    tensor = model(tensor.unsqueeze(0))
    tensor = get_likely_index(tensor)
    tensor = index_to_label(tensor.squeeze())
    return tensor


waveform, sample_rate, utterance, *_ = train_set[73]

print(f"Expected: {utterance}. Predicted: {predict(waveform)}.")

In [None]:
prediction =[]
with torch.no_grad():
    tensor = mel_spectrogram_test.to(device)
    tensor = tensor.reshape(1, -1)
    tensor = model(tensor.unsqueeze(0))
    
    pred = tensor.cpu().numpy()
    argmax = np.argmax(pred)
    probs = np.exp(pred[0]).tolist() 
df = pd.DataFrame(probs, columns = labels)

In [None]:
index_to_label(argmax)

In [None]:
argmax = np.argmax(pred)
print(argmax, np.exp(pred[0][0][argmax]))

In [None]:
df_sample1 = pd.read_csv('df_M5_5.csv')
df_sample2 = df_sample1.drop(['Unnamed: 0'], axis = 1)
df_sample2.loc[ df_sample2.row_id == "soundscape_29201_15", labels] = df.values
df_sample2.to_csv('df_M5_15.csv')

In [None]:
w = pd.read_csv('df_M5_15.csv')
submission = w.drop(['Unnamed: 0'], axis = 1)
submission.to_csv('submission.csv')

In [None]:
del model
import gc
gc.collect()
torch.cuda.empty_cache()
print(torch.cuda.memory_allocated()/1024**2)
!nvidia-smi
print(torch.cuda.memory_summary(device=None, abbreviated=False))

## CNN MS_learning

In [None]:
class CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(53824, 1000)
        self.fc2 = nn.Linear(1000, 264)


    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        return F.softmax(x,dim=1)     

model = CNNet().to(device)

In [None]:
class Narrow(nn.Module):
    def __init__(self, dim, start, length):
        super(Narrow, self).__init__()
        self.dim = dim
        self.start = start
        self.length = length
    
    def forward(self, x):
        x = torch.narrow(x, self.dim, self.start, self.length)
        return x 

In [None]:
transform = nn.Sequential(Narrow(3,0,128)) # for batch
for batch, (X, Y) in enumerate(train_loader):
    print(batch,X.shape,transform(X).shape, Y.shape)
    print(batch,type(X), type(Y))
    break

In [None]:
summary(model, input_size=( 1, 128, 128))

In [None]:
cost = torch.nn.CrossEntropyLoss()
learning_rate = CFG.learning_rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scaler = GradScaler()               #Automatic Mixed Precision (AMP)
NUM_ACCUMULATION_STEPS =2         #gradient accumulation (GA)

# Create the training function

def train(dataloader, model, loss, optimizer):
    if CFG.wandb:
            os.environ["WANDB_API_KEY"] = CFG.api
            wandb.init(project=CFG.project, name=CFG.architecture, entity=CFG.entity, reinit=True, config=class2dict(CFG))
        
    model.train()
    correct = 0
    size = len(dataloader.dataset)
    for batch, (X, Y) in enumerate(dataloader):
            Y= Y.type(torch.LongTensor)  
            X, Y = X.to(device), Y.to(device)
            optimizer.zero_grad()
            
            with autocast():            
                pred = model(X)
                loss = cost(pred, Y)
                
            loss = loss / NUM_ACCUMULATION_STEPS        
                
            scaler.scale(loss).backward()       
            
            if ((batch + 1) % NUM_ACCUMULATION_STEPS == 0) or (batch + 1 == len(dataloader)):     
                scaler.step(optimizer)              
                scaler.update()                     

            if batch % 1000 == 0:
                loss, current = loss.item(), batch * len(X)
                correct += (pred.argmax(1)==Y).type(torch.float).sum().item()
                tqdm.write(f'loss: {loss:>7f}  [{current:>5d}/{size:>5d}]')
    if CFG.wandb:
        wandb.log({'train_loss': loss/size,
                  'train_accuracy': correct / size})   


# Create the validation/test function

def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            
            pred = model(X)
            X = transform(X)
            test_loss += cost(pred, Y).item()
            correct += (pred.argmax(1)==Y).type(torch.float).sum().item()

    test_loss /= size
    correct /= size

    tqdm.write(f'\nTest Error:\nacc: {(100*correct):>0.1f}%, avg loss: {test_loss:>8f}\n')
    if CFG.wandb:
        wandb.log({'test_loss': test_loss,
                   'test_accuracy': correct})

In [None]:
for t in range(CFG.epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train(train_loader, model, cost, optimizer)
    test(val_loader, model)
print('Done!')

In [None]:
prediction =[]
with torch.no_grad():
    X_t = mel_spectrogram_test.unsqueeze(0).to(device)
    pred = model(X_t).cpu().numpy()
    argmax = np.argmax(pred)
    preds = pred[0].tolist() 
df = pd.DataFrame(preds)

In [None]:
df = pd.DataFrame(preds)
classes_inv = le.inverse_transform(np.array(list(classes)))
df_sample.loc[df_sample.row_id == "soundscape_29201_5" , classes_inv] = df.T
df_sample

In [None]:
df_sample.to_csv('df_sample_CNN_5.csv')
files.download('df_sample_CNN_5.csv')

## pretrained resnet34

In [None]:
wavet =[]       #as option

for label, item in tqdm (zipped_list_train):
    waveform, sample_rate = torchaudio.load(get_train_data_(item))
    if waveform.shape[1] > CFG.n_size*CFG.sr:
        waveform = torch.narrow(waveform, 1,0,CFG.n_size*CFG.sr)                                      
    else:
        delta = CFG.n_size*CFG.sr- waveform.shape[1]
        waveform = F.pad(waveform,(0,delta), "constant", 0)                                            
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate= sample_rate, n_mels = 256,  n_fft = 2048)(waveform) 
  #1D-->3D
    mel_spectrogram_ = mel_spectrogram.expand(3,*mel_spectrogram.shape[1:]) 

    mel_transforms = transforms.Compose([
        transforms.CenterCrop(224),        
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])(mel_spectrogram_)

    wavet.append([mel_transforms, label])   

In [None]:
wavev =[]

for label, item in tqdm (zipped_list_val):
    waveform, sample_rate = torchaudio.load(get_train_data_(item))
   
    if waveform.shape[1] > CFG.n_size*CFG.sr:
        waveform = torch.narrow(waveform, 1,0,CFG.n_size*CFG.sr)              
    else:
        delta = CFG.n_size*CFG.sr- waveform.shape[1]
        waveform = F.pad(waveform,(0,delta), "constant", 0)                    
        
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate= sample_rate, n_mels = 256,  n_fft = 2048)(waveform) 
    #1D-->3D
    mel_spectrogram_ = mel_spectrogram.expand(3,*mel_spectrogram.shape[1:]) 

    mel_transforms = transforms.Compose([
        transforms.CenterCrop(224),        
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])(mel_spectrogram_)

    wavev.append([mel_transforms, label])   

In [None]:
def get_test_data_3(filename):
    with ZipFile('birdclef-2023.zip') as myzip:
        return myzip.open(f'test_soundscapes/{filename}')
    waveform, sample_rate = torchaudio.load(get_test_data_3('soundscape_29201.ogg'))
if waveform.shape[1] > CFG.n_size*CFG.sr:
    waveform = torch.narrow(waveform, 1,0,CFG.n_size*CFG.sr).to(device)
else:
    delta = CFG.n_size*CFG.sr- waveform.shape[1]
    waveform = F.pad(waveform,(0,delta), "constant", 0).to(device)
mel_spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate= sample_rate, n_fft = 2048)(waveform.cpu())  
mel_spectrogram_test = mel_spectrogram.expand(3,*mel_spectrogram.shape[1:])
mel_transforms_test = transforms.Compose([
        transforms.CenterCrop(224),        
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])(mel_spectrogram_test) 
mel_transforms_test.shape  

In [None]:
from torchvision.models import resnet34
rnet = torchvision.models.resnet34(weights='ResNet34_Weights.DEFAULT')
summary(rnet, input_size=( 3, 224, 224))

In [None]:
rnet = models.resnet34(weights='ResNet34_Weights.DEFAULT')
for param in rnet.parameters():
    param.requires_grad = False
num_ftrs = rnet.fc.in_features
rnet.fc = nn.Sequential(
    nn.Linear(num_ftrs, 264),
    nn.ReLU(),
    nn.Softmax(dim=1))

model = rnet.to(device)

In [None]:
for t in range(CFG.epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train(train_loader, model, cost, optimizer)
    test(val_loader, model)
print('Done!')

In [None]:
model.eval()
with torch.no_grad():
  
    X_t = mel_transforms_test.unsqueeze(0).to(device)
    pred = model(X_t).cpu().numpy()
preds = pred[0].tolist()   
argmax = np.argmax(pred)
print(argmax, pred[0][argmax])

In [None]:
df = pd.DataFrame(preds)
classes_inv = le.inverse_transform(np.array(list(classes)))
df_sample.loc[df_sample.row_id == "soundscape_29201_5" , classes_inv] = df.T
df_sample.to_csv('df_sample_resnet34_5.csv')