# Autoencoder
- Auto = self
- encode = convert into a different form
- Autoencoder = a system that teaches itself how to encode information
- Number outputs correspond to the entry from the model

# Structure of the layers
- Encode: input > layers > bottleneck or latent code (central node)

#  Goal of autoencoder:
- Get the output to match the input closer possible
- data compression of dimension reduction
- data cleaning (denoising, despeckling, occlusion)  
- feature extraction
- anomaly / fraud detection
- pretraining deep or complex models   


In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [2]:
from preprocess.preprocess import MovieLens

ml = MovieLens()

In [13]:
train_loader = ml.dataset_encoder(32)

In [14]:
x = next(iter(train_loader))

In [13]:
class Autoencoder(nn.Module):
    def __init__(self, num_features, embedding_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(num_features, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, embedding_dim),
        )
        self.decoder = nn.Sequential(
            nn.Linear(embedding_dim, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, num_features),
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

In [None]:
# Define model, optimizer, and loss function
def train_model(num_features, embedding_dim, num_epochs):
    model = Autoencoder(num_features, embedding_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    
    # Training loop
    for epoch in range(num_epochs):
        for user_ratings in training_data:  # Assuming training_data is a loader for user-item interactions
            optimizer.zero_grad()
            # Forward pass
            encoded, decoded = model(user_ratings)
            loss = criterion(decoded, user_ratings)  # Reconstruct the user ratings
            # Backward pass and update
            loss.backward()
            optimizer.step()
    
        # Print training progress (optional)matrix = self.df_ratings[['userId', 'movieId', 'rating']]
        print(f"Epoch: {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

In [None]:
# After training, extract user embeddings for recommendation
user_embeddings = []
for user_ratings in validation_data:  # Can use test data as well
    encoded, _ = model(user_ratings)
    user_embeddings.append(encoded.detach())  # Detach from computation graph



In [None]:
# Example of using user embeddings with another model (replace with your model)
class LogisticRegressionRecommender(nn.Module):
    def __init__(self, num_features, embedding_dim, num_items):
        super(LogisticRegressionRecommender, self).__init__()
        self.linear = nn.Linear(num_features + embedding_dim, num_items)

    def forward(self, user_features, user_embedding):
        combined_features = torch.cat((user_features, user_embedding), dim=1)
        return torch.sigmoid(self.linear(combined_features))

# ... (train and use the LogisticRegressionRecommender model)
