In [1]:
import torch
import torchvision
from torchvision import transforms
from torchsummary import summary
from torch.utils.data import DataLoader,Dataset
from PIL import Image
import keras
from numpy import load
import torch.nn.functional as F

import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [3]:
from google.colab import drive
drive.mount('/content/drive')

path = '/content/drive/MyDrive/'

import os

directory = '/content/drive/My Drive/FPIS/IMG/'
IMG = os.listdir(directory)

Mounted at /content/drive


In [4]:
from tqdm import tqdm
from torch.utils.data import TensorDataset
# Аугментации
transform = transforms.Compose([
    transforms.RandomResizedCrop(244),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

augmented_images = []

for filename in tqdm(IMG):
    filepath = os.path.join(directory, filename)
    try:
        image = Image.open(filepath).convert('RGB')
        for _ in range(20):  # 20 копий каждого изображения → 100 x 20 = 2000
            augmented = transform(image)
            augmented_images.append(augmented)
    except Exception as e:
        print(f"Ошибка с файлом {filename}: {e}")

# Собираем всё в один тензор и оборачиваем в Dataset
dataset_tensor = torch.stack(augmented_images)
ds = TensorDataset(dataset_tensor)

print(f" Всего изображений в датасете: {len(ds)}")


100%|██████████| 100/100 [00:07<00:00, 13.89it/s]


 Всего изображений в датасете: 2000


In [5]:
from torch.utils.data import random_split

# Размеры
total_size = len(ds)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

# Разбиваем
train_ds, val_ds = random_split(ds, [train_size, val_size])

print(f" Train: {len(train_ds)} | Val: {len(val_ds)}")


 Train: 1600 | Val: 400


In [6]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)


In [7]:
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if device.type == 'cuda':
    print(f"Используется GPU: {torch.cuda.get_device_name(0)}")
else:
    print("Используется CPU")

# Загружаем предобученную ResNet18
model = models.resnet18(pretrained=True)

# Меняем последний слой на 100 классов
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 100)  # <- 100 классов

model = model.to(device)


Используется GPU: Tesla T4


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 169MB/s]


In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [9]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, in tqdm(train_loader, desc=f" Epoch {epoch+1}/{num_epochs}"):
        inputs = inputs.to(device)
        labels = torch.randint(0, 100, (inputs.size(0),)).to(device)  # Заменить на реальные метки, если есть

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Loss: {avg_loss:.4f}")


 Epoch 1/10: 100%|██████████| 50/50 [00:07<00:00,  6.83it/s]


Loss: 4.7299


 Epoch 2/10: 100%|██████████| 50/50 [00:05<00:00,  8.67it/s]


Loss: 4.6498


 Epoch 3/10: 100%|██████████| 50/50 [00:06<00:00,  8.31it/s]


Loss: 4.6466


 Epoch 4/10: 100%|██████████| 50/50 [00:05<00:00,  8.56it/s]


Loss: 4.6477


 Epoch 5/10: 100%|██████████| 50/50 [00:05<00:00,  8.48it/s]


Loss: 4.6354


 Epoch 6/10: 100%|██████████| 50/50 [00:05<00:00,  8.56it/s]


Loss: 4.6260


 Epoch 7/10: 100%|██████████| 50/50 [00:05<00:00,  8.51it/s]


Loss: 4.6350


 Epoch 8/10: 100%|██████████| 50/50 [00:05<00:00,  8.50it/s]


Loss: 4.6241


 Epoch 9/10: 100%|██████████| 50/50 [00:05<00:00,  8.46it/s]


Loss: 4.6309


 Epoch 10/10: 100%|██████████| 50/50 [00:05<00:00,  8.36it/s]

Loss: 4.6254





In [10]:
import os
import torch
from PIL import Image
import matplotlib.pyplot as plt


def predict_image(image_path, model, device):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        outputs = model(image)
    _, predicted_class = torch.max(outputs, 1)
    return predicted_class.item()


class_images = {i: [] for i in range(100)}

# Предположим, что у вас есть список всех изображений
image_names = os.listdir(directory)

# Применяем предсказание ко всем изображениям
for filename in tqdm(image_names):
    image_path = os.path.join(directory, filename)
    predicted_class = predict_image(image_path, model, device)
    class_images[predicted_class].append(filename)  # Храним название файла


def show_class_image_names(class_id, class_images, num_images):
    images = class_images[class_id]
    print(f"Изображения, отнесенные к классу {class_id}:")
    for i, img_name in enumerate(images[:num_images]):  # Отображаем первые 5 названий
        print(f"{i + 1}. {img_name}")


def display_images_for_image(image_name, class_images, model, device, directory):
    image_path = os.path.join(directory, image_name)

    if not os.path.exists(image_path):
        print(f"Ошибка: файл с именем {image_name} не найден.")
        return


    predicted_class = predict_image(image_path, model, device)
    print(f"Изображение '{image_name}' отнесено к классу {predicted_class}.")


    show_class_image_names(predicted_class, class_images, num_images=20)

# Запрашиваем имя изображения у пользователя
image_name_input = 'V $ X V PRiNCE - Дом 50.png'

# Отображаем названия изображений для этого класса
display_images_for_image(image_name_input, class_images, model, device, directory)

100%|██████████| 100/100 [00:01<00:00, 93.49it/s]


Изображение 'V $ X V PRiNCE - Дом 50.png' отнесено к классу 95.
Изображения, отнесенные к классу 95:
1. MACAN - IVL.png


In [11]:
torch.save(model.state_dict(), '/content/drive/MyDrive/resnet_music.pth')
print("Модель сохранена!")

Модель сохранена!


In [13]:
import os
import json
from collections import defaultdict

# Предположим, структура такая:
# /path/to/data/class_0/song1.png
# /path/to/data/class_1/song2.png

root_dir = "/content/drive/MyDrive/FPIS/IMG/"
class_to_songs = defaultdict(list)

for root, dirs, files in os.walk(root_dir):
    for file in files:
        if file.endswith(".png"):
            class_name = os.path.basename(root)
            class_to_songs[class_name].append(file)

# Преобразуем строковые ключи классов в числовые, если нужно
class_to_index = {cls: idx for idx, cls in enumerate(sorted(class_to_songs))}
index_to_songs = {class_to_index[k]: v for k, v in class_to_songs.items()}

# Сохраняем словарь
with open("class_to_songs.json", "w") as f:
    json.dump(index_to_songs, f)


In [15]:
# Скачивание из виртуальной среды
from google.colab import files
files.download('class_to_songs.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>