In [1]:
import os
import re
import yaml 
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm


from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer, LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models import KeyedVectors
from gensim.parsing.preprocessing import remove_stopword_tokens

import torch
from torch_geometric.data import HeteroData
from torch_geometric.loader import LinkLoader, LinkNeighborLoader, NeighborLoader
import torch_geometric.transforms as T

In [28]:
from torch_geometric.nn import SAGEConv, to_hetero
from torch import Tensor

In [3]:
# raw_data_path
raw_data_root_path = 'data/movie-lens/ml-1m/'
movie_file_name = 'movies.dat'
users_file_name = 'users.dat'
ratings_file_name = 'ratings.dat'
movies_data_path = os.path.join(raw_data_root_path, movie_file_name)
users_data_path = os.path.join(raw_data_root_path, users_file_name)
ratings_data_path = os.path.join(raw_data_root_path, ratings_file_name)

# read data to df 
# Read users
users = pd.read_csv(users_data_path, 
                    sep="::", 
                    engine="python", 
                    encoding="ISO-8859-1", 
                    names=["UserID", "Gender", "Age", "Occupation", "Zip-code"])
print(users.head())
# Read movies
movies = pd.read_csv(movies_data_path, 
                        sep="::", 
                        engine="python", 
                        encoding="ISO-8859-1", 
                        names=["MovieID", "Title", "Genres"])
print(movies.head())
# Read ratings
ratings = pd.read_csv(ratings_data_path, 
                        sep="::", 
                        engine="python", 
                        encoding="ISO-8859-1", 
                        names=["UserID", "MovieID", "Rating", "Timestamp"])
print(ratings.head())

   UserID Gender  Age  Occupation Zip-code
0       1      F    1          10    48067
1       2      M   56          16    70072
2       3      M   25          15    55117
3       4      M   45           7    02460
4       5      M   25          20    55455
   MovieID                               Title                        Genres
0        1                    Toy Story (1995)   Animation|Children's|Comedy
1        2                      Jumanji (1995)  Adventure|Children's|Fantasy
2        3             Grumpier Old Men (1995)                Comedy|Romance
3        4            Waiting to Exhale (1995)                  Comedy|Drama
4        5  Father of the Bride Part II (1995)                        Comedy
   UserID  MovieID  Rating  Timestamp
0       1     1193       5  978300760
1       1      661       3  978302109
2       1      914       3  978301968
3       1     3408       4  978300275
4       1     2355       5  978824291


In [4]:
print(f'{users.shape = }')
print(f'{movies.shape = }')
print(f'{ratings.shape = }')
print(f'Number of users in user df = {users['UserID'].nunique()}')
print(f'Number of movies in movie df = {movies['MovieID'].nunique()}')
print(f'Number of users in ratings df = {ratings['UserID'].nunique()}')
print(f'Number of movies in ratings df = {ratings['MovieID'].nunique()}')

users.shape = (6040, 5)
movies.shape = (3883, 3)
ratings.shape = (1000209, 4)
Number of users in user df = 6040
Number of movies in movie df = 3883
Number of users in ratings df = 6040
Number of movies in ratings df = 3706


### Split the data temporally

We'll split the data temporally ti mimick the deployement time scenario as closely as possible.
The split would on ratings df 
- train set - 90% 
- test set - 10% 

train set would further be devided into another train and val set. We can use inbuilt Random Splitter for latter task

For users and movies - the hypothesis is that we're inly aware about the user, movie that exist in the system before the thrshhold time of split. So we'll have the train and test users/movies as well 

In [5]:
# split the data temporally
train_prop  = 0.9
test_prop = 0.1 

ratings.sort_values(by = 'Timestamp',
                    ascending=True, 
                    inplace=True)

ratings.reset_index(drop = True,
                    inplace=True)

start_time = ratings['Timestamp'].loc[0]
end_time = ratings['Timestamp'].loc[len(ratings) - 1]

train_start_time = start_time 
train_end_time = ratings['Timestamp'].loc[int(len(ratings)*train_prop)]
test_start_time = ratings['Timestamp'].loc[int(len(ratings)*train_prop) + 1]
test_end_time = end_time 

train_ratings = ratings[(ratings['Timestamp'] <= train_end_time) & (ratings['Timestamp'] >= train_start_time)]
test_ratings = ratings[(ratings['Timestamp'] <= test_end_time) & (ratings['Timestamp'] >= test_start_time)]

print(f'{train_ratings.shape = }')
print(f'{test_ratings.shape = }')

train_user_id = train_ratings['UserID'].unique().tolist()
train_movie_id = train_ratings['MovieID'].unique().tolist()
test_user_id = test_ratings['UserID'].unique().tolist()
test_movie_id = test_ratings['MovieID'].unique().tolist()

train_users = users[users['UserID'].isin(train_user_id)]
test_users = users[users['UserID'].isin(test_user_id)]
train_movies = movies[movies['MovieID'].isin(train_movie_id)]
test_movies = movies[movies['MovieID'].isin(test_movie_id)]

train_ratings.shape = (900189, 4)
test_ratings.shape = (100020, 4)


### Feature Engineering

Create a feature engineering routine. 
Takes in the raw user, movie and ratings table, performes the following tasks - 

In [6]:
users 

Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


In [7]:
w2v_model = KeyedVectors.load_word2vec_format('data/GoogleNews-vectors-negative300_2.bin', binary=True)

In [8]:
possible_genres = ['action',
                    'adventure',
                    'animation',
                    "children's",
                    'comedy',
                    'crime',
                    'documentary',
                    'drama',
                    'fantasy',
                    'film-noir',
                    'horror',
                    'musical',
                    'mystery',
                    'romance',
                    'sci-fi',
                    'thriller',
                    'war',
                    'western']

In [9]:
movies

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


In [10]:
def preprocess_genre(genre):
    genre = genre.lower()
    genre = genre.split('|')
    return genre
    
def preprocess_title(title):
    title = title.lower()
    title = title.split(' ')
    title = remove_stopword_tokens(title)
    title  = [re.sub(r"[^ a-zA-Z0-9]+",'',word) for word in title]
    title = [word.strip() for word in title]
    title = [word for word in title if len(word)]
    return title

def compute_average_embedding(genres, w2v_model):
    embeddings = [w2v_model.get_vector(genre) for genre in genres if genre in w2v_model.index_to_key]
    if embeddings:
        return np.mean(embeddings, axis=0).tolist()
    else:
        # Return a zero vector if no genres are found in the model
        return np.zeros(w2v_model.vector_size).tolist()

def create_features(users, movies, ratings):
    """
    - Encode the userid, movieid
    - Map the genfer to F/M = 0/1
    - create word2vec average embeddings for the Title
    - create multihot encoding for genre
    - create encoding for Year
    """
    user_encoder = LabelEncoder()
    movie_encoder = LabelEncoder()
    users['UserID'] = user_encoder.fit_transform(users['UserID'])
    movies['MovieID'] = movie_encoder.fit_transform(movies['MovieID'])

    ratings['UserID'] = user_encoder.transform(ratings['UserID'])
    ratings['MovieID'] = movie_encoder.transform(ratings['MovieID'])

    users ['Gender'] = users['Gender'].replace({'F' : 0, 'M': 1})

    movies[['Title', 'Year']] = movies['Title'].str.extract(r'^(.*?)(?: \((\d{4})\))?$')
    movies['Year'] = movies['Year'].astype(int)
    movies['Year'].fillna(0, inplace = True)

    movies['Genre_List'] = movies['Genres'].apply(preprocess_genre)

    for genre in tqdm(possible_genres):
        movies[genre] = movies['Genre_List'].apply(lambda x: 1 if genre in x else 0)

    movies['Title_List'] = movies['Title'].apply(preprocess_title)

    movies['Title_Embedding'] = movies['Title_List'].apply(lambda x: compute_average_embedding(x, w2v_model))

    return users, movies, ratings

In [11]:
train_users_transformed, train_movies_transformed, train_ratings_transformed = create_features(train_users.copy(),
                                                                                               train_movies.copy(),
                                                                                               train_ratings.copy())

  users ['Gender'] = users['Gender'].replace({'F' : 0, 'M': 1})
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  movies['Year'].fillna(0, inplace = True)
100%|██████████| 18/18 [00:00<00:00, 1361.25it/s]


In [12]:
test_users_transformed, test_movies_transformed, test_ratings_transformed = create_features(test_users.copy(),
                                                                                            test_movies.copy(),
                                                                                            test_ratings.copy())

  users ['Gender'] = users['Gender'].replace({'F' : 0, 'M': 1})
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  movies['Year'].fillna(0, inplace = True)
100%|██████████| 18/18 [00:00<00:00, 1672.41it/s]


In [13]:
test_users_transformed

Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,0,0,1,10,48067
1,1,1,56,16,70072
2,2,1,25,15,55117
3,3,1,45,7,02460
4,4,1,25,20,55455
...,...,...,...,...,...
6000,1204,0,25,7,94117
6001,1205,1,50,0,43231
6015,1206,1,45,1,37209
6027,1207,1,18,4,94133


In [14]:
test_movies_transformed

Unnamed: 0,MovieID,Title,Genres,Year,Genre_List,action,adventure,animation,children's,comedy,...,horror,musical,mystery,romance,sci-fi,thriller,war,western,Title_List,Title_Embedding
0,0,Toy Story,Animation|Children's|Comedy,1995,"[animation, children's, comedy]",0,0,1,1,1,...,0,0,0,0,0,0,0,0,"[toy, story]","[0.135498046875, 0.09771728515625, -0.06188964..."
1,1,Jumanji,Adventure|Children's|Fantasy,1995,"[adventure, children's, fantasy]",0,1,0,1,0,...,0,0,0,0,0,0,0,0,[jumanji],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,2,Grumpier Old Men,Comedy|Romance,1995,"[comedy, romance]",0,0,0,0,1,...,0,0,0,1,0,0,0,0,"[grumpier, old, men]","[0.1028645858168602, 0.1243489608168602, 0.065..."
3,3,Waiting to Exhale,Comedy|Drama,1995,"[comedy, drama]",0,0,0,0,1,...,0,0,0,0,0,0,0,0,"[waiting, exhale]","[0.12060546875, 0.0087890625, 0.29052734375, 0..."
4,4,Father of the Bride Part II,Comedy,1995,[comedy],0,0,0,0,1,...,0,0,0,0,0,0,0,0,"[father, bride, ii]","[-0.0677083358168602, -0.0944010391831398, 0.0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3878,3402,Meet the Parents,Comedy,2000,[comedy],0,0,0,0,1,...,0,0,0,0,0,0,0,0,"[meet, parents]","[-0.194091796875, -0.03369140625, 0.0590820312..."
3879,3403,Requiem for a Dream,Drama,2000,[drama],0,0,0,0,0,...,0,0,0,0,0,0,0,0,"[requiem, dream]","[0.048095703125, -0.06427001953125, 0.12304687..."
3880,3404,Tigerland,Drama,2000,[drama],0,0,0,0,0,...,0,0,0,0,0,0,0,0,[tigerland],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3881,3405,Two Family House,Drama,2000,[drama],0,0,0,0,0,...,0,0,0,0,0,0,0,0,"[family, house]","[0.0692138671875, -0.094482421875, -0.04479980..."


### Create Graph

Create graph from the training data and split into train and val graph 
- train_prop - 0.9
- val_prop - 0.1

In [63]:
def create_graph(users, movies, ratings):
    # create users features
    user_feat = torch.tensor(users[['Gender', 'Age', 'Occupation']].values, dtype=torch.float)
    movie_feat = torch.tensor(torch.hstack([torch.tensor(movies[['Year'] + [genre for genre in possible_genres]].values), 
                               torch.tensor(movies['Title_Embedding'].values.tolist())]), dtype=torch.float)
    edge_index_user_to_movie = torch.tensor(ratings[['UserID', 'MovieID']].values.T)
    edge_label_user_to_movie = torch.tensor(ratings['Rating'].values - 1, dtype=torch.long)
    print(edge_label_user_to_movie.unique())
    data = HeteroData()
    data["user"].node_id = torch.arange(users['UserID'].nunique())
    data["movie"].node_id = torch.arange(movies['MovieID'].nunique())
    data['user'].x = user_feat
    data['movie'].x = movie_feat
    data['user', 'rates', 'movie'].edge_index = edge_index_user_to_movie
    data['user', 'rates', 'movie'].edge_label = edge_label_user_to_movie
    data["movie", "rev_rates", "user"].edge_index = edge_index_user_to_movie.flip(0)
    # data['movie', 'rev_rates', 'user'].edge_label = edge_label_user_to_movie
    return data

In [64]:
train_graph = create_graph(train_users_transformed,
                           train_movies_transformed,
                           train_ratings_transformed)

train_graph

tensor([0, 1, 2, 3, 4])


  movie_feat = torch.tensor(torch.hstack([torch.tensor(movies[['Year'] + [genre for genre in possible_genres]].values),


HeteroData(
  user={
    node_id=[6011],
    x=[6011, 3],
  },
  movie={
    node_id=[3678],
    x=[3678, 319],
  },
  (user, rates, movie)={
    edge_index=[2, 900189],
    edge_label=[900189],
  },
  (movie, rev_rates, user)={ edge_index=[2, 900189] }
)

In [65]:
train_graph[('user', 'rates', 'movie')].edge_label.unique()

tensor([0, 1, 2, 3, 4])

In [66]:
transform = T.RandomLinkSplit(
    num_val=0.1,  # 10% validation edges
    num_test = 0,
    disjoint_train_ratio=0.3,  # 30% of training edges used for supervision
    edge_types=("user", "rates", "movie"),  # Edge type
    rev_edge_types=("movie", "rev_rates", "user"),  # Reverse edge type
)

train_data, val_data, test_data = transform(train_graph)

In [67]:
train_data

HeteroData(
  user={
    node_id=[6011],
    x=[6011, 3],
  },
  movie={
    node_id=[3678],
    x=[3678, 319],
  },
  (user, rates, movie)={
    edge_index=[2, 567120],
    edge_label=[486102],
    edge_label_index=[2, 486102],
  },
  (movie, rev_rates, user)={ edge_index=[2, 567120] }
)

In [68]:
train_data['movie'].node_id

tensor([   0,    1,    2,  ..., 3675, 3676, 3677])

In [69]:
train_data[('user', 'rates', 'movie')].edge_index

tensor([[4023, 1550, 5765,  ...,  787, 3622, 2602],
        [2731, 2634, 2585,  ...,  216, 1190, 1043]])

In [70]:
train_data[('user', 'rates', 'movie')].edge_label_index

tensor([[  37, 2154,  523,  ..., 2009, 2411, 3728],
        [3673, 1461, 3463,  ..., 2939, 1349, 3511]])

In [71]:
train_data[('user', 'rates', 'movie')].edge_label

tensor([4, 2, 5,  ..., 0, 0, 0])

In [72]:
train_ratings_transformed[(train_ratings_transformed['UserID'] == 2581) & (train_ratings_transformed['MovieID'] == 1075)]

Unnamed: 0,UserID,MovieID,Rating,Timestamp
521850,2581,1075,5,973727254


### Create Mini Batches
- use linkneighbourloader 

In [73]:
# Define seed edges:
edge_label_index = train_data["user", "rates", "movie"].edge_label_index
edge_label = train_data["user", "rates", "movie"].edge_label

# Define the LinkNeighborLoader
train_loader = LinkNeighborLoader(
    data=train_data,  # Use the training data
    num_neighbors=[20, 10],  # 20 neighbors in the first hop, 10 in the second hop
    edge_label_index=(("user", "rates", "movie"), edge_label_index),  # Edge type and indices
    edge_label=edge_label,  # Labels for the edges
    batch_size=128,  # Batch size
    shuffle=True,  # Shuffle the data during training
)

# Inspect a sample:
sampled_data = next(iter(train_loader))

print("Sampled mini-batch:")
print("===================")
print(sampled_data)
# print(f"{sampled_data[('user', 'rates', 'movie')].edge_label}")

Sampled mini-batch:
HeteroData(
  user={
    node_id=[4056],
    x=[4056, 3],
    n_id=[4056],
    num_sampled_nodes=[3],
  },
  movie={
    node_id=[2598],
    x=[2598, 319],
    n_id=[2598],
    num_sampled_nodes=[3],
  },
  (user, rates, movie)={
    edge_index=[2, 13670],
    edge_label=[128],
    edge_label_index=[2, 128],
    e_id=[13670],
    num_sampled_edges=[2],
    input_id=[128],
  },
  (movie, rev_rates, user)={
    edge_index=[2, 17532],
    e_id=[17532],
    num_sampled_edges=[2],
  }
)


In [74]:
sampled_data['user'].x

tensor([[ 1., 45.,  3.],
        [ 1., 18., 12.],
        [ 0., 45.,  0.],
        ...,
        [ 1., 45.,  7.],
        [ 1., 35., 16.],
        [ 0., 25.,  9.]])

### Heterogenous GNN Model

Creating a heterogenous gnn model for the bipartite graph created.
- Create embeddings for the bith node types 
- The embeddings are trained for the downstream task of predicting the ratings for the movies by a user

In [77]:
class GNN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()

        self.conv1 = SAGEConv(hidden_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)

    def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

# Our final classifier applies the dot-product between source and destination
# node embeddings to derive edge-level predictions:
class Classifier(torch.nn.Module):
    def forward(self, x_user: Tensor, x_movie: Tensor, edge_label_index: Tensor) -> Tensor:
        # Convert node embeddings to edge-level representations:
        edge_feat_user = x_user[edge_label_index[0]]
        edge_feat_movie = x_movie[edge_label_index[1]]
        # Apply dot-product to get a prediction per supervision edge:
        return (edge_feat_user * edge_feat_movie).sum(dim=-1)

class Model(torch.nn.Module):
    def __init__(self, num_user_feat, num_movie_feat, hidden_channels):
        super().__init__()
        # Instantiate homogeneous GNN:
        self.gnn = GNN(hidden_channels)
        self.user_batch_norm = torch.nn.BatchNorm1d(num_user_feat)
        self.movie_batch_norm = torch.nn.BatchNorm1d(num_movie_feat)
        self.movie_lin = torch.nn.Linear(num_movie_feat, hidden_channels)
        self.user_lin = torch.nn.Linear(num_user_feat, hidden_channels)
        # Convert GNN model into a heterogeneous variant:
        self.gnn = to_hetero(self.gnn, metadata=train_graph.metadata())

        self.classifier = Classifier()

    def forward(self, data: HeteroData) -> Tensor:
        x_dict = {
          "user": self.user_lin(self.user_batch_norm(data['user'].x)),
          "movie": self.movie_lin(self.movie_batch_norm(data["movie"].x)),
        }

        # `x_dict` holds feature matrices of all node types
        # `edge_index_dict` holds all edge indices of all edge types
        x_dict = self.gnn(x_dict, data.edge_index_dict)

        pred = self.classifier(
            x_dict["user"],
            x_dict["movie"],
            data["user", "rates", "movie"].edge_label_index,
        )

        return pred
    
model = Model(num_user_feat=train_data['user'].x.shape[1], 
              num_movie_feat=train_data['movie'].x.shape[1],
              hidden_channels=64)

print(model)

Model(
  (gnn): GraphModule(
    (conv1): ModuleDict(
      (user__rates__movie): SAGEConv(64, 64, aggr=mean)
      (movie__rev_rates__user): SAGEConv(64, 64, aggr=mean)
    )
    (conv2): ModuleDict(
      (user__rates__movie): SAGEConv(64, 64, aggr=mean)
      (movie__rev_rates__user): SAGEConv(64, 64, aggr=mean)
    )
  )
  (user_batch_norm): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (movie_batch_norm): BatchNorm1d(319, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (movie_lin): Linear(in_features=319, out_features=64, bias=True)
  (user_lin): Linear(in_features=3, out_features=64, bias=True)
  (classifier): Classifier()
)


### Training a Hetrogenous Link-level GNN

In [84]:
import tqdm
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: '{device}'")

model = model.to(device)  # Move the model to the device
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Define the optimizer

for epoch in range(1, 6):  # Training for 5 epochs
    total_loss = total_examples = 0  # Initialize loss and examples
    model.train()  # Set the model to training mode
    
    for sampled_data in tqdm.tqdm(train_loader):  # Iterate over the training loader
        optimizer.zero_grad()  # Zero the gradients
        
        # Move `sampled_data` to the device
        sampled_data = sampled_data.to(device)
        
        # Run the `forward` pass of the model
        pred = model(
            sampled_data
        )
        
        # Extract ground truth labels
        ground_truth = sampled_data["user", "rates", "movie"].edge_label
        
        # Apply binary cross-entropy loss
        loss = F.cross_entropy(pred, ground_truth)
        
        # Backward pass and optimization step
        loss.backward()
        optimizer.step()
        
        # Accumulate total loss and total examples
        total_loss += float(loss) * pred.numel()  # Scale loss by number of predictions
        total_examples += pred.numel()  # Count the number of predictions
    
    # Print epoch loss
    print(f"Epoch: {epoch:03d}, Loss: {total_loss / total_examples:.4f}")

Device: 'cpu'


  0%|          | 0/3798 [00:00<?, ?it/s]

tensor([3, 5, 0, 4, 5, 4, 3, 0, 3, 4, 4, 2, 4, 0, 4, 0, 4, 5, 4, 3, 4, 4, 0, 0,
        0, 1, 0, 5, 0, 0, 0, 3, 3, 0, 4, 0, 0, 0, 0, 3, 3, 3, 0, 4, 4, 3, 0, 0,
        0, 0, 4, 4, 0, 2, 0, 0, 0, 0, 1, 0, 4, 5, 0, 5, 0, 0, 1, 3, 3, 2, 4, 0,
        4, 0, 0, 0, 3, 5, 3, 4, 4, 4, 0, 0, 0, 5, 3, 0, 2, 5, 0, 0, 0, 5, 2, 5,
        0, 2, 0, 0, 3, 0, 4, 0, 3, 4, 5, 0, 0, 0, 2, 2, 0, 0, 4, 0, 0, 4, 0, 3,
        4, 3, 5, 4, 4, 0, 0, 0])





RuntimeError: Expected floating point type for target with class probabilities, got Long