In [1]:
import numpy as np
from pymongo import MongoClient
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['Dataset']  # Replace with your database name
collection = db['audio']

# Fetch data
documents = collection.find({})

# Initialize lists to store features
mfccs = []
spectral_centroids = []
zero_crossing_rates = []

# Iterate over documents to extract features
for doc in documents:
    if 'features' in doc and 'mfccs_stats' in doc['features'] and 'mean' in doc['features']['mfccs_stats']:
        mfccs.append(doc['features']['mfccs_stats']['mean'])  # Extracting 20 MFCC mean values
    if 'features' in doc and 'spectral_centroid_stats' in doc['features'] and 'mean' in doc['features']['spectral_centroid_stats']:
        spectral_centroids.append([doc['features']['spectral_centroid_stats']['mean']])  # Ensuring 2D by wrapping in list
    if 'features' in doc and 'zero_crossing_rate_stats' in doc['features'] and 'mean' in doc['features']['zero_crossing_rate_stats']:
        zero_crossing_rates.append([doc['features']['zero_crossing_rate_stats']['mean']])  # Ensuring 2D by wrapping in list

# Convert lists to numpy arrays
mfccs = np.array(mfccs)
spectral_centroids = np.array(spectral_centroids)
zero_crossing_rates = np.array(zero_crossing_rates)

# Check and adjust dimensions if necessary
if mfccs.ndim == 1:
    mfccs = mfccs.reshape(-1, 1)
if spectral_centroids.ndim == 1:
    spectral_centroids = spectral_centroids.reshape(-1, 1)
if zero_crossing_rates.ndim == 1:
    zero_crossing_rates = zero_crossing_rates.reshape(-1, 1)

# Stack the features to form a feature matrix
features = np.concatenate((mfccs, spectral_centroids, zero_crossing_rates), axis=1)

# Normalize the features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

# Example labels array, replace with your actual labels array
labels = np.random.randint(2, size=normalized_features.shape[0])  # Random binary labels for demonstration

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, labels, test_size=0.2, random_state=42)

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP

class MusicModel(nn.Module):
    def _init_(self, num_features):
        super(MusicModel, self)._init_()
        self.layer1 = nn.Linear(num_features, 128)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.layer2 = nn.Linear(128, 64)
        self.output_layer = nn.Linear(64, 1)  # Adjust this if your output differs

    def forward_model(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        return self.output_layer(x)

In [3]:
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist

def distributed_enviorment():
    # Set up the distributed environment for PyTorch
    dist.init_process_group('nccl', init_method='env://')  # Assumes NCCL backend; adjust as needed

def create_distributed_model(model, device):
    model = model.to(device)
    ddp_model = DDP(model, device_ids=[device])
    return ddp_model

In [4]:
from torch.utils.data import TensorDataset, DataLoader

# Assuming X_train and y_train are your features and labels loaded from previous steps
tensor_x = torch.Tensor(X_train)  # transform to torch tensor
tensor_y = torch.Tensor(y_train)

my_dataset = TensorDataset(tensor_x, tensor_y)  # create your dataset
my_dataloader = DataLoader(my_dataset, batch_size=32, shuffle=True)

In [5]:
def train_music_model(ddp_model, dataloader, epochs, device):
    optimizer = optim.Adam(ddp_model.parameters(), lr=0.001)
    criterion = nn.BCEWithLogitsLoss()
    
    try:
        for epoch in range(epochs):
            epoch_start = time.time()
            epoch_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = ddp_model(inputs)
                loss = criterion(outputs, labels.view(-1, 1))
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            epoch_duration = time.time() - epoch_start
            print(f'Epoch {epoch+1}, Loss: {epoch_loss / len(dataloader)}, Duration: {epoch_duration}s')
    except Exception as e:
        print(f"An error occurred: {e}")

In [6]:
if torch.cuda.is_available():
    # Set device to CUDA
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    # Set device to CPU
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

# Create a tensor on the selected device
x = torch.rand(3, 3).to(device)

# Perform some operations
y = torch.matmul(x, x)

# Print the result
print(y)

CUDA is available. Using GPU.
tensor([[0.3158, 0.3915, 0.3024],
        [0.3143, 0.5429, 0.4501],
        [0.1412, 0.3373, 0.3045]], device='cuda:0')


In [9]:
import torch
import torch.nn as nn

class MusicRecommendationModel(nn.Module):
    def _init_(self, num_features):  # Ensure 'num_features' is properly defined as a parameter
        super(MusicRecommendationModel, self)._init_()
        self.layer1 = nn.Linear(num_features, 512)  # Use 'num_features' to define input size
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.layer2 = nn.Linear(512, 256)
        self.output_layer = nn.Linear(256, 1)  # Assuming binary output, adjust according to your needs

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.relu(self.layer2(x))
        return self.output_layer(x)

In [11]:
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP

# Initialize Distributed Training Environment
def setup_distributed_environment():
    dist.init_process_group(backend='nccl')  # Make sure to call this only once

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Assuming 'num_features' is known and properly set according to your dataset
num_features = 20  # Set this to the number of features in your dataset

# Create an instance of the MusicRecommendationModel with the correct number of input features
model = MusicRecommendationModel(num_features=num_features)
model.to(device)

# Wrap the model with DistributedDataParallel
model = DDP(model, device_ids=[torch.cuda.current_device()])

TypeError: MusicRecommendationModel.__init__() got an unexpected keyword argument 'num_features'