<a href="https://colab.research.google.com/github/KyleReed397/AMT5005MX--Programming-in-Python/blob/main/Untitled20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import librosa
import os
from tqdm import tqdm

def load_data(data_folder):
    """Load audio files from dataset"""
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop',
              'jazz', 'metal', 'pop', 'reggae', 'rock']

    audio_files = []
    labels = []

    for genre_idx, genre in enumerate(genres):
        genre_folder = os.path.join(data_folder, genre)
        if os.path.exists(genre_folder):
            wav_files = [f for f in os.listdir(genre_folder)
                        if f.endswith('.wav') or f.endswith('.au')]
            audio_files.extend([os.path.join(genre_folder, f) for f in wav_files])
            labels.extend([genre_idx] * len(wav_files))

    return audio_files, labels, genres

def extract_features(audio_path):
    """Extract features from audio file"""
    try:
        # Load exactly 30 seconds of audio
        audio, sr = librosa.load(audio_path, sr=22050, duration=30)

        # fix for audio length
        target_length = 22050 * 30  # 30 seconds at 22050 Hz
        if len(audio) < target_length:
            audio = np.pad(audio, (0, target_length - len(audio)))
        else:
            audio = audio[:target_length]

        # Extract mel-spectrogram withparameters
        mel_spec = librosa.feature.melspectrogram(
            y=audio, sr=sr, n_mels=128, hop_length=512, n_fft=2048)

        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        mel_spec_db = (mel_spec_db - np.mean(mel_spec_db)) / (np.std(mel_spec_db) + 1e-8)

        return mel_spec_db
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return np.zeros((128, 1293))

class MusicCNN(nn.Module):
    def __init__(self):
        super(MusicCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 * 16 * 161, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def train_model(data_folder, max_files=5):
    """Train the model"""
    print("Loading data...")
    audio_files, labels, genres = load_data(data_folder)

    # Limit files to fasten process
    if max_files:
        limited_files = []
        limited_labels = []
        for genre_idx in range(10):
            genre_files = [f for f, l in zip(audio_files, labels) if l == genre_idx]
            genre_files = genre_files[:max_files]
            limited_files.extend(genre_files)
            limited_labels.extend([genre_idx] * len(genre_files))
        audio_files, labels = limited_files, limited_labels

    print(f"Processing {len(audio_files)} files...")

    # Extract features
    features_list = []
    for file_path in tqdm(audio_files):
        features = extract_features(file_path)
        features_list.append(features)

    X = np.array(features_list)
    y = np.array(labels)
    X = np.expand_dims(X, axis=1)

    # Split data
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    train_size = int(0.8 * len(X))
    train_idx = indices[:train_size]
    test_idx = indices[train_size:]

    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    print(f"Training on {len(X_train)} samples...")

    # Setup model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = MusicCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    X_train = torch.FloatTensor(X_train).to(device)
    y_train = torch.LongTensor(y_train).to(device)

    # Train
    for epoch in range(10):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        if epoch % 2 == 0:
            print(f'Epoch {epoch+1}/10, Loss: {loss.item():.4f}')

    # Test
    X_test = torch.FloatTensor(X_test).to(device)
    y_test = torch.LongTensor(y_test).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs, 1)
        accuracy = (predicted == y_test).float().mean().item()

    print(f'Accuracy: {accuracy:.1%}')

    return model, genres

def predict_song(model, audio_path, genres):
    """Predict genre of a song"""
    device = next(model.parameters()).device
    features = extract_features(audio_path)
    features_tensor = torch.FloatTensor(features).unsqueeze(0).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        output = model(features_tensor)
        probabilities = torch.softmax(output, dim=1)[0]
        predicted_idx = torch.argmax(output, dim=1)[0].item()

    return {
        'genre': genres[predicted_idx],
        'confidence': probabilities[predicted_idx].item()
    }

# Usage
def run_classification(data_path, song_path):
    """Complete workflow: train and predict"""
    # Train model
    model, genres = train_model(data_path)

    # Predict song
    result = predict_song(model, song_path, genres)
    print(f"\nPredicted genre: {result['genre']}")
    print(f"Confidence: {result['confidence']:.1%}")

    return model, genres

# Example usage:
# model, genres = train_model("/path/to/genres_original")
# result = predict_song(model, "/path/to/song.wav", genres)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define your paths here
data_path = "/content/drive/MyDrive/DATASET/genres_original"
song_path = "/content/drive/MyDrive/u dont understand ORIGINAL BEFORE NEW LYRICS latest.wav"

In [None]:
# First train the model
model, genres = train_model(data_path)

Loading data...
Processing 50 files...


100%|██████████| 50/50 [00:03<00:00, 14.05it/s]


Training on 40 samples...
Epoch 1/10, Loss: 2.3003
Epoch 3/10, Loss: 9.3885
Epoch 5/10, Loss: 2.5232
Epoch 7/10, Loss: 2.0593
Epoch 9/10, Loss: 1.3980
Accuracy: 50.0%


In [None]:
result = predict_song(model, song_path, genres)
print(f"Predicted genre: {result['genre']}")
print(f"Confidence: {result['confidence']:.1%}")

Predicted genre: pop
Confidence: 23.6%
