In [3]:
import torch
import torch.nn as nn

class MixedInputModel(nn.Module):
    def __init__(self, num_categorical_features, embedding_dim, num_continuous_features, hidden_dims, output_dim):
        super(MixedInputModel, self).__init__()

        # Embedding layers for categorical variables
        self.embedding_layers = nn.ModuleList([
            nn.Embedding(num_categories, embedding_dim) for num_categories in num_categorical_features
        ])

        # Linear layer for continuous variables
        self.continuous_layer = nn.Linear(num_continuous_features, hidden_dims[0])

        # Fully connected layers for the combination of embedded and continuous features
        fc_layers = []
        in_features = embedding_dim * len(num_categorical_features) + hidden_dims[0]
        for out_features in hidden_dims[1:]:
            fc_layers.append(nn.Linear(in_features, out_features))
            fc_layers.append(nn.ReLU())
            in_features = out_features

        self.fc_layers = nn.Sequential(
            *fc_layers,
            nn.Linear(in_features, output_dim)
        )

    def forward(self, categorical_inputs, continuous_inputs):
        # Ensure indices for categorical variables are within range
        categorical_inputs = [torch.clamp(cat_inputs, 0, num_categories - 1) for cat_inputs, num_categories in zip(categorical_inputs, [layer.num_embeddings for layer in self.embedding_layers])]

        # Embed categorical variables
        embedded_outputs = [embedding(cat_inputs) for cat_inputs, embedding in zip(categorical_inputs, self.embedding_layers)]
        embedded_outputs = torch.cat(embedded_outputs, dim=1)

        # Process continuous variables
        continuous_output = self.continuous_layer(continuous_inputs)

        # Concatenate embedded and continuous features
        combined_features = torch.cat([embedded_outputs, continuous_output], dim=1)

        # Forward pass through fully connected layers
        output = self.fc_layers(combined_features)

        return output

# Example usage
num_categorical_features = [10, 5]  # Number of categories for each categorical variable
embedding_dim = 4
num_continuous_features = 3
hidden_dims = [16, 32, 16]  # Additional hidden layers
output_dim = 1

model = MixedInputModel(num_categorical_features, embedding_dim, num_continuous_features, hidden_dims, output_dim)

# Example input
categorical_inputs = [torch.randint(0, 10, (32,)) for _ in num_categorical_features]  # Batch size of 32 for each categorical variable
continuous_inputs = torch.rand((32, 3))  # Batch size of 32, 3 continuous variables

# Forward pass
output = model(categorical_inputs, continuous_inputs)
# print(output)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

# Custom dataset class
class MixedInputDataset(Dataset):
    def __init__(self, categorical_data, continuous_data, labels):
        self.categorical_data = categorical_data
        self.continuous_data = continuous_data
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # Return a tuple (categorical_inputs, continuous_inputs, label)
        return (
            [cat_data[idx] for cat_data in self.categorical_data],
            self.continuous_data[idx],
            self.labels[idx]
        )

# Example usage of the DataLoader
num_samples = 1000
num_categorical_features = [10, 5]
embedding_dim = 4
num_continuous_features = 3
hidden_dims = [16, 32, 16]
output_dim = 1

# Sample data
categorical_data = [torch.randint(0, 10, (num_samples,)) for _ in num_categorical_features]
continuous_data = torch.rand((num_samples, num_continuous_features))
labels = torch.randint(0, 2, (num_samples,))

# Create a dataset
dataset = MixedInputDataset(categorical_data, continuous_data, labels)

# Define a DataLoader
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Example usage of the DataLoader
for batch in dataloader:
    categorical_inputs, continuous_inputs, labels = batch

    # Forward pass through the model
    output = model(categorical_inputs, continuous_inputs)

    # Your training/validation/testing logic goes here
    # ...

    print("Batch processed successfully.")
