In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math
import os
import cv2
import IPython.display as ipd 
import librosa 
import librosa.display
import torch
import numpy as np
import torch.nn.functional as F
import torchvision

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [27]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [28]:
trainPath = '/kaggle/input/freesound-audio-tagging/audio_train/'
trainData = pd.read_csv('/kaggle/input/freesound-audio-tagging/train.csv')
trainData.head()

Unnamed: 0,fname,label,manually_verified
0,00044347.wav,Hi-hat,0
1,001ca53d.wav,Saxophone,1
2,002d256b.wav,Trumpet,0
3,0033e230.wav,Glockenspiel,1
4,00353774.wav,Cello,1


In [29]:
dataLabels = np.unique(trainData.label.values)
dataLabelsEncoder = {dataLabel:i for i, dataLabel in enumerate(dataLabels)}

In [30]:
class Dataset(Dataset):
    def __init__(self, dataframe, test=False):
        self.dataframe = dataframe
        self.test = test

    def __getitem__(self, index):
        path_to_file = self.get_path_to_file(index)
        signal = self.preprocess_signal(path_to_file)

        x = np.stack([cv2.resize(signal, (128, 128)) for _ in range(3)])

        if self.test == False:
            y = dataLabelsEncoder[self.dataframe.label.values[index]]
            return torch.tensor(x, dtype=torch.float), y
        else:
             return torch.tensor(x, dtype=torch.float)

    def get_path_to_file(self, index):
        if self.test:
            return '../input/freesound-audio-tagging/audio_test/' + self.dataframe.fname.values[index]
        else:
            return '../input/freesound-audio-tagging/audio_train/' + self.dataframe.fname.values[index]

    def preprocess_signal(self, path_to_file):
        signal, _ = librosa.load(path_to_file)
        signal = librosa.feature.melspectrogram(y=signal)
        return librosa.power_to_db(signal, ref=np.max)

    def __len__(self):
        return self.dataframe.shape[0]

In [31]:
batch_size = 64

xTrain, xVal, yTrain, yVal = train_test_split(trainData, trainData, test_size=0.2, shuffle=True, random_state=5)

trainSet = Dataset(xTrain)
valSet = Dataset(xVal)
trainLoader = DataLoader(trainSet, batch_size=batch_size, shuffle=True)
valLoader = DataLoader(valSet , batch_size=batch_size, shuffle=True)

print('Training set: {}, Validation set: {}'.format(xTrain.shape[0], xVal.shape[0]))

Training set: 7578, Validation set: 1895


In [32]:
# Путь к локально сохраненным весам модели
local_weights_path = "/kaggle/input/hahahah/efficientnet_b0_rwightman-7f5810bc.pth"

In [33]:
# Загрузка модели без предварительного обучения
model = torchvision.models.efficientnet_b0(pretrained=False)



In [34]:
# Загрузка локально сохраненных весов
state_dict = torch.load(local_weights_path)

In [35]:
# Загрузка весов в модель
model.load_state_dict(state_dict)

<All keys matched successfully>

In [36]:
# Изменение последнего слоя модели
model.classifier[1] = torch.nn.Linear(1280, 41)

In [37]:
# Перенос модели на устройство
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [38]:
from time import time 
start_time = time()

epochs = 1
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
cost = torch.nn.CrossEntropyLoss()
total_batches = len(trainLoader)
for epoch in range(epochs):
    train_loss = 0
    val_loss = 0
    train_correct = 0
    val_correct = 0
    model.train()
    for batch_idx, (x, y) in enumerate(trainLoader):
        optimizer.zero_grad()
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = cost(pred, y)
        train_loss += cost(pred, y).item()
        train_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        loss.backward()
        optimizer.step()
        # Выводим процент выполнения эпохи
        percent_complete = ((batch_idx + 1) / total_batches) * 100
        print(f"\rEpoch {epoch + 1}/{epochs} [{int(percent_complete)}%]", end='')

    model.eval()
    with torch.no_grad():
        for x, y in valLoader:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = cost(pred, y)
            val_loss += cost(pred, y).item()
            val_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    train_loss = train_loss / len(trainLoader)
    val_loss = val_loss / len(valLoader)
    train_accuracy = train_correct / len(xTrain)
    val_accuracy = val_correct / len(xVal)
    print()
    print("epoch = %d, train_loss = %.5f, val_loss = %.5f, train_accuracy = %.5f, val_accuracy = %.5f" % (epoch, train_loss, val_loss, train_accuracy, val_accuracy))
    
end_time = time()
total_time = end_time - start_time
print(f'Total Training Time: {total_time:.2f} seconds')

Epoch 1/1 [100%]
epoch = 0, train_loss = 1.79080, val_loss = 1.19262, train_accuracy = 0.51491, val_accuracy = 0.66596
Total Training Time: 1122.11 seconds


In [40]:
test = pd.read_csv('../input/freesound-audio-tagging/sample_submission.csv')

test_dataset = Dataset(test, test=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
predictions = torch.tensor([])
model.eval()
for x in test_loader:
    x = x.to(device)
    with torch.no_grad():
        y_hat = model(x)
    predictions = torch.cat([predictions, y_hat.cpu()])



In [47]:
import torch

In [48]:
# Преобразуйте массив NumPy обратно в тензор PyTorch
predictions_tensor = torch.from_numpy(predictions)

In [49]:
# Примените softmax к тензору
predictions_softmax = torch.nn.functional.softmax(predictions_tensor, dim=1)

In [50]:
# Преобразуйте результат обратно в массив NumPy, если это необходимо
predictions_softmax_numpy = predictions_softmax.detach().numpy()

In [54]:
submission_top1 = test.copy()

N = len(test)
for i in range(N):
    p = predictions[i, :]
    idx = np.argmax(p)
    submission_top1.label[i] = dataLabels[idx]

submission_top1.to_csv('submission_final.csv', index=False, header=True)

submission_top1.head()

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  submission_top1.label[i] = dataLabels[idx]


Unnamed: 0,fname,label
0,00063640.wav,Shatter
1,0013a1db.wav,Flute
2,002bb878.wav,Bass_drum
3,002d392d.wav,Bass_drum
4,00326aa9.wav,Oboe
