In [19]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [35]:
df=pd.read_csv('test.csv')
df.head()
# Get unique items/users and generate mapping dictionaries
dimension_list=["ticker","investorname","calendardate"]
dimension_to_index={}
index_to_dimension={}
for dim in dimension_list:
    unique_items = df[dim].unique()
    item_to_id = {item: idx for idx, item in enumerate(unique_items)}
    id_to_item = {idx: item for item, idx in item_to_id.items()}
    df[dim] = df[dim].map(item_to_id)
    dimension_to_index[dim]=item_to_id
    index_to_dimension[dim]=id_to_item

df.head()
dimension_to_index
index_to_dimension
np.array(list(index_to_dimension["ticker"].keys()))

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [49]:
#create a Dataset class
class InvDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.ticker=df["ticker"].values
        self.investorname=df["investorname"].values
        self.calendardate=df["calendardate"].values

        #self.X = df[["ticker","investorname","calendardate"]].values
        self.y = df["value"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        #return self.X[idx,0],self.X[idx,0],self.X[idx,2], self.y[idx] 
        return self.ticker[idx],self.investorname[idx],self.calendardate[idx], self.y[idx]
    
    
#test the InvDataset class

dataset = InvDataset(df)
dataset[0] 


#Create a DataLoader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0)
dataset[0:3]

(array([0, 0, 0]),
 array([0, 0, 0]),
 array([0, 1, 2]),
 array([0.17032018, 0.41282912, 0.92447239]))

In [33]:
# Define your Model
class RecommenderNet(nn.Module):
    def __init__(self, n_tickers, n_investornames, n_calendardates,n_factors, hidden, dropout):
        super().__init__()
        self.ticker_emb = nn.Embedding(n_tickers, n_factors)
        self.investorname_emb = nn.Embedding(n_investornames, n_factors)
        self.calendardate_emb = nn.Embedding(n_calendardates, n_factors)

        self.layers = nn.Sequential(
            nn.Linear(n_factors*3, hidden),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(hidden, 1)
        )
        
    def forward(self, ticker_id, investorname_id, calendardate_id):
        u = self.ticker_emb(ticker_id)
        i = self.investorname_emb(investorname_id)
        d=self.calendardate_emb(calendardate_id)
        print(f'u={u},i={i},d={d}'')
        x = torch.cat([u, i,d], dim=-1)
        x = self.layers(x)
        return x

In [34]:
x,y=next(iter(dataloader))
model=RecommenderNet(len(dimension_to_index["ticker"]),len(dimension_to_index["investorname"]),len(dimension_to_index["calendardate"]),n_factors=50, hidden=10, dropout=0.05)
test=model(x[:,0],x[:,1],x[:,2])
test

tensor([[0.2363],
        [0.2888],
        [0.1876],
        [0.1060]], grad_fn=<AddmmBackward0>)

In [54]:
# Training function
def train(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0
    for ticker, investorname, calenderdate, target in dataloader:
        # Forward pass
        prediction = model(ticker, investorname, calenderdate)
        loss = criterion(prediction, target)
        print(f'ticker={ticker}, investorname={investorname}, calenderdate={calenderdate}, target={target}, prediction={prediction}, loss={loss}')
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        total_loss += loss.item()
    return total_loss / len(dataloader)

# Validation function
def validate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for user, item, target in dataloader:
            prediction = model(user, item)
            loss = criterion(prediction, target)
            total_loss += loss.item()
    return total_loss / len(dataloader)


In [55]:
n_factors = 5  # Adjust as needed
hidden = 10  # Adjust as needed
dropout = 0.-5  # Adjust as needed

model=RecommenderNet(len(dimension_to_index["ticker"]),len(dimension_to_index["investorname"]),len(dimension_to_index["calendardate"]),n_factors=50, hidden=10, dropout=0.05)
criterion = nn.MSELoss()  # Adjust as needed
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adjust as needed

# Train the model
n_epochs = 10  # Adjust as needed
for epoch in range(n_epochs):
    train_loss = train(model, dataloader, optimizer, criterion)
    valid_loss = validate(model, dataloader, criterion)
    print(f'Epoch: {epoch+1}/{n_epochs}.. Training loss: {train_loss}.. Validation Loss: {valid_loss}')

ticker=tensor([1, 6, 7, 3]), investorname=tensor([3, 0, 7, 2]), calenderdate=tensor([1, 3, 2, 1]), target=tensor([0.2782, 0.1094, 0.3832, 0.7296], dtype=torch.float64), prediction=tensor([[0.4867],
        [0.4950],
        [0.4162],
        [0.4337]], grad_fn=<AddmmBackward0>), loss=0.05941814664988344


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: Found dtype Double but expected Float