In [1]:
import torch
import pandas as pd
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder

In [2]:
# config

latent_vector_size = 3
mlp_layers = [16,4,1]
embedding_dim = 10
learning_rate = 0.001

# example data

col_info_list = [
    {'col_name': 'feature1','col_type': 'continuous'},
    {'col_name': 'feature2','col_type': 'continuous'},
    {'col_name': 'feature3','col_type': 'continuous'},
    {'col_name': 'feature4','col_type': 'categorical'},
    {'col_name': 'feature5','col_type': 'categorical'},
    {'col_name': 'feature6','col_type': 'categorical'},
]

Y = [1, 0 , 0, 1]

df = pd.DataFrame({
    'feature1': [8.11, 8.12, 8.13, 8.14],
    'feature2': [8.21, 8.22, 8.23, 8.24],
    'feature3': [8.31, 8.32, 8.33, 8.34],
    'feature4': ['A', 'A', 'B', 'C'],
    'feature5': ['A', 'E', 'B', 'C'],
    'feature6': ['A', 'E', 'B', 'C'],
})

df

Unnamed: 0,feature1,feature2,feature3,feature4,feature5,feature6
0,8.11,8.21,8.31,A,A,A
1,8.12,8.22,8.32,A,E,E
2,8.13,8.23,8.33,B,B,B
3,8.14,8.24,8.34,C,C,C


In [3]:
# convert Y to tensor

Y = torch.tensor(Y, dtype=torch.float)
Y = Y.view(-1, 1)
Y

tensor([[1.],
        [0.],
        [0.],
        [1.]])

In [4]:
# calculate number of unique values in each categorical column

for col_info in col_info_list:
        if col_info['col_type']=='categorical':
                col_info['num_unique'] = df[col_info['col_name']].nunique()
col_info_list

[{'col_name': 'feature1', 'col_type': 'continuous'},
 {'col_name': 'feature2', 'col_type': 'continuous'},
 {'col_name': 'feature3', 'col_type': 'continuous'},
 {'col_name': 'feature4', 'col_type': 'categorical', 'num_unique': 3},
 {'col_name': 'feature5', 'col_type': 'categorical', 'num_unique': 4},
 {'col_name': 'feature6', 'col_type': 'categorical', 'num_unique': 4}]

In [5]:
# normalize continuous columns

continous_columns = [col_info['col_name'] for col_info in col_info_list if col_info['col_type']=='continuous']
x_continuous = torch.tensor(df[continous_columns].values, dtype=torch.float32)
x_continuous

tensor([[8.1100, 8.2100, 8.3100],
        [8.1200, 8.2200, 8.3200],
        [8.1300, 8.2300, 8.3300],
        [8.1400, 8.2400, 8.3400]])

In [6]:
# label encode categorical columns

x_categorical = None
for col_info in col_info_list:
    if col_info['col_type']=='categorical':
        # load
        col_data = df[col_info['col_name']]
        num_unique_val = col_info['num_unique']
        label_encoder = LabelEncoder()
        
        # transform
        col_data = label_encoder.fit_transform(col_data)
        col_data = torch.tensor(col_data, dtype=torch.long)
        col_data = col_data.view(1, -1)

        # concat
        if x_categorical is None:
            x_categorical = col_data
        else:
            x_categorical = torch.cat((x_categorical, col_data), dim=0)
            
        # save for inference
        col_info['label_encoder'] = label_encoder

x_categorical = x_categorical.T
x_categorical

tensor([[0, 0, 0],
        [0, 3, 3],
        [1, 1, 1],
        [2, 2, 2]])

In [7]:
class Deep(nn.Module):
    def __init__(self, num_features, mlp_layers):
        """
        Args:
            num_features: number of features
            mlp_layers : list of hidden layer sizes
        """
        super().__init__()
        self.linear_layers = []
        for i in range(len(mlp_layers)):
            if i==0:
                self.linear_layers.append(nn.Linear(num_features, mlp_layers[i]))
            else:
                self.linear_layers.append(nn.Linear(mlp_layers[i-1], mlp_layers[i]))
            self.linear_layers.append(nn.ReLU())
        self.linear_layers = nn.ModuleList(self.linear_layers)
        
    def forward(self, x):
        for layer in self.linear_layers:
            x = layer(x)
        return x

In [8]:
class FM(nn.Module):
    def __init__(self, num_features=None, latent_vector_size=None):
        """
        Args:
            num_features : number of features
            latent_vector_size : FM latent vector size
        """
        super().__init__()
        self.latent_vector = nn.Parameter(torch.randn(num_features, latent_vector_size).float(), requires_grad=True)
        self.lin = nn.Linear(num_features, 1)
        self.activation = nn.Sigmoid()
        
    def forward(self, x):
        out_1 = torch.matmul(x, self.latent_vector).pow(2).sum(1, keepdim=True) #S_1^2
        out_2 = torch.matmul(x.pow(2), self.latent_vector.pow(2)).sum(1, keepdim=True) # S_2
        
        out_inter = 0.5*(out_1 - out_2)
        out_lin = self.lin(x)
        out = out_inter + out_lin
        out = self.activation(out)
        return out

In [9]:
class DeepFM(nn.Module):
    def __init__(self, col_info_list, latent_vector_size, mlp_layers, embedding_dim):
        """
        Args:
            n: number of features
            mlp_layers : list of hidden layer sizes
        """
        super().__init__()
        
        # for embedding the categorical columns
        self.embedding_list = [nn.Embedding(col_info['num_unique'], embedding_dim) 
                               for col_info in col_info_list 
                               if col_info['col_type']=='categorical']
        self.embedding_list = nn.ModuleList(self.embedding_list)
        
        # calculate the total feature size
        num_categorical_columns = len(self.embedding_list)
        num_continuous_columns = len(col_info_list) - num_categorical_columns
        feature_size = num_categorical_columns * embedding_dim + num_continuous_columns
        
        # use it to create FM and Deep model
        self.FM_model = FM(feature_size, latent_vector_size)
        self.Deep_model = Deep(feature_size, mlp_layers)
        self.activation = nn.Sigmoid()
        
    def forward(self, x_categorical, x_continuous):
        # embed the categorical columns
        x_categorical_embedded = [embedding(x_categorical[:, i]) for i, embedding in enumerate(self.embedding_list)]
        x_categorical_embedded = torch.cat(x_categorical_embedded, dim=1)
        
        # concatenate categorical and continuous columns
        x = torch.cat([x_categorical_embedded, x_continuous], dim=1)
        
        # get output from FM and Deep part
        y_fm = self.FM_model(x)
        y_deep = self.Deep_model(x)
        
        # Add both and sigmoid
        x = self.activation(y_fm + y_deep)
        return x

In [10]:
# initialize and check output model
model = DeepFM(col_info_list, latent_vector_size, mlp_layers, embedding_dim)
out = model(x_categorical, x_continuous)
out

tensor([[0.7311],
        [0.5000],
        [0.7308],
        [0.7311]], grad_fn=<SigmoidBackward0>)

In [11]:
# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(10):
    model.train()
    output = model(x_categorical, x_continuous)
    loss = criterion(output, Y)
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    optimizer.step()
    print(f"Epoch {epoch}, Loss: {loss.item()}")

Epoch 0, Loss: 0.6580270528793335
Epoch 1, Loss: 0.6561570763587952
Epoch 2, Loss: 0.6456461548805237
Epoch 3, Loss: 0.6070890426635742
Epoch 4, Loss: 0.5439338684082031
Epoch 5, Loss: 0.5108123421669006
Epoch 6, Loss: 0.5044958591461182
Epoch 7, Loss: 0.5034641623497009
Epoch 8, Loss: 0.5032670497894287
Epoch 9, Loss: 0.5032221674919128
