In [None]:
import numpy as np
from matplotlib import pyplot as plt
import cv2
import os
import csv
from tqdm import tqdm
import pandas as pd

import torch; torch.manual_seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torch.distributions

In [None]:
from torch.utils.data import TensorDataset, DataLoader, ConcatDataset

'''
label = 0: front, 1:backward, 2:right, 3:jelly
'''

# The directories and the corresponding labels
directories = [os.path.join(".", "output_front_ppo"), os.path.join(".", "output_bw_sac"), 
               os.path.join(".", "output_right_ppo"), os.path.join(".", "output_jelly")]
labels = [0, 1, 2, 3]  # Change this to your actual labels

datasets = []  # List to store datasets from each directory

for directory, label in zip(directories, labels):
    # Get all subdirectories
    subdirs = [f.path for f in os.scandir(directory) if f.is_dir()]
    subdirs.sort()

    # Create an empty 3D array to store the combined data
    combined_arr = np.zeros((len(subdirs), 1000, 10))  # Use np.zeros instead of np.empty

    for i, subdir in enumerate(subdirs):
        # Load the action and obs CSV files
        action_df = pd.read_csv(os.path.join(subdir, "action.csv"), header=None)
        obs_df = pd.read_csv(os.path.join(subdir, "obs.csv"), header=None)

        # Concatenate the DataFrames horizontally and convert to a 3D array
        combined_data = pd.concat([action_df, obs_df], axis=1)
        combined_arr[i] = np.reshape(combined_data.values, (1000, 10))  # Use i instead of i-1

        # Convert to tensor and add a dimension for the batch size
        combined_tensor = torch.from_numpy(combined_arr[i]).unsqueeze(0)

        # Create a TensorDataset and append it to the list
        datasets.append(TensorDataset(combined_tensor, torch.tensor([label])))

# Concatenate all datasets
dataset = ConcatDataset(datasets)

# VAE code -1

In [None]:
#test

class Encoder(nn.Module):
    def __init__(self, input_shape, hidden_dim1, hidden_dim2, hidden_dim3, latent_dim):
        super(Encoder, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(input_shape, hidden_dim1),
            nn.ReLU(),
            nn.Linear(hidden_dim1, hidden_dim2),
            nn.ReLU(),
            nn.Linear(hidden_dim2, hidden_dim3),
            nn.ReLU(),
            nn.Linear(hidden_dim3, latent_dim * 2)  # mean and variance
        )

    def forward(self, x):
        h = self.encoder(x)
        mu, log_var = h.chunk(2, dim=1)
        return mu, log_var, h


class Decoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_shape):
        super(Decoder, self).__init__()

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim3),
            nn.ReLU(),
            nn.Linear(hidden_dim3, hidden_dim2),
            nn.ReLU(),
            nn.Linear(hidden_dim2, hidden_dim1),
            nn.ReLU(),
            nn.Linear(hidden_dim1, output_shape),
        )

    def forward(self, z):
        x_reconstructed = self.decoder(z)
        return x_reconstructed


class VAE(nn.Module):
    def __init__(self, input_shape, hidden_dim1, hidden_dim2, hidden_dim3, latent_dim):
        super(VAE, self).__init__()

        self.encoder = Encoder(input_shape, hidden_dim1, hidden_dim2, hidden_dim3, latent_dim)
        self.decoder = Decoder(latent_dim, hidden_dim1, hidden_dim2, hidden_dim3, input_shape)

    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var / 2)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        mu, log_var,h = self.encoder(x)
        z = self.reparameterize(mu, log_var)
        x_reconstructed = self.decoder(z)
        return x_reconstructed, mu, log_var

    
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.dataset import random_split

# Assuming combined_tensor is your data
# Convert the data to float32
# dataset = TensorDataset(combined_tensor.float())

# Define the data loader
batch_size = 256  # adjust as necessary

# Split data into train, validation, and test
train_size = int(0.7 * len(dataset))  # 70% for training
valid_size = int(0.15 * len(dataset))  # 15% for validation
test_size = len(dataset) - train_size - valid_size  # 15% for testing

train_dataset, valid_dataset, test_dataset = random_split(dataset, [train_size, valid_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Hyperparameters
# input_shape = combined_tensor.shape[1] * combined_tensor.shape[2]  # modify this to match your data
# hidden_dim1 = 128  # modify as needed
# hidden_dim2 = 64  # modify as needed
# hidden_dim3 = 24  # modify as needed
# latent_dim = 2  # modify as needed
# lr = 5e-5  # learning rate
# n_epochs = 200  # modify as needed
# beta = 0.2


# Lists to store losses for each epoch
avg_losses = []
avg_recon_losses = []
avg_kl_divs = []


input_shape = combined_tensor.shape[1] * combined_tensor.shape[2]  # modify this to match your data
hidden_dim1 = 24  # modify as needed
hidden_dim2 = 16  # modify as needed
hidden_dim3 = 12  # modify as needed
latent_dim = 2  # modify as needed
lr = 5e-5  # learning rate
n_epochs = 200  # modify as needed
beta = 0.2
    
# Model, optimizer, and loss function
model = VAE(input_shape, hidden_dim1, hidden_dim2, hidden_dim3, latent_dim)
optimizer = optim.Adam(model.parameters(), lr=lr)  # Make sure you're using the correct optimizer
loss_fn = nn.MSELoss()  # And the correct loss function

# Define the label order
label_order = [0, 1, 2, 3]  # Modify this to define the order of labels

def train(epoch, model, optimizer, loss_fn, train_loader):
        model.train()
        train_loss = 0
        total_recon_loss = 0
        total_kl_div = 0
        for i, (batch_data, batch_labels) in enumerate(train_loader):  # using train_loader instead of dataloader
    #         batch_data = batch[0]  # get the data from the batch

            optimizer.zero_grad()

            # Flatten the data
            batch_data = batch_data.view(batch_data.size(0), -1)
            batch_data = batch_data.float()

            reconstructed_batch, mu, log_var = model(batch_data)

            # Loss: reconstruction loss + KL divergence
            recon_loss = loss_fn(reconstructed_batch, batch_data)
            kl_divergence = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
            loss = recon_loss + beta*kl_divergence

            loss.backward()
            train_loss += loss.item()
            total_recon_loss += recon_loss.item()
            total_kl_div += kl_divergence.item()
            optimizer.step()

        avg_loss = train_loss / len(train_loader.dataset)
        avg_recon_loss = total_recon_loss / len(train_loader.dataset)
        avg_kl_div = total_kl_div / len(train_loader.dataset)
        print(f'====> Epoch: {epoch} Average loss: {avg_loss}, Recon Loss: {avg_recon_loss}, KL Div: {avg_kl_div}')

        return avg_loss, avg_recon_loss, avg_kl_div


# Training loop for each label
for label in label_order:
    print(f"Training for label {label}")
    
    # Filter the dataset for the current label
    filtered_dataset = [data for data in dataset if data[1] == label]
    
    train_size = int(0.7 * len(filtered_dataset))
    valid_size = int(0.15 * len(filtered_dataset))
    test_size = len(filtered_dataset) - train_size - valid_size
    train_dataset, valid_dataset, test_dataset = random_split(filtered_dataset, [train_size, valid_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    # Training
    for epoch in range(1, n_epochs + 1):
        avg_loss, avg_recon_loss, avg_kl_div = train(epoch, model, optimizer, loss_fn, train_loader)
        avg_losses.append(avg_loss)
        avg_recon_losses.append(avg_recon_loss)
        avg_kl_divs.append(avg_kl_div)


    
import matplotlib.pyplot as plt

# Plot the losses
plt.figure(figsize=(7,5))
plt.plot(avg_losses, label='Average Loss')
plt.plot(avg_recon_losses, label='Reconstruction Loss')
plt.plot(avg_kl_divs, label='KL Divergence')
plt.title('Losses')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Get encoded representations (h values) for all data points
encoded_representations = []
labels_list = []

model.eval()
for label in label_order:
    print(f"Training for label {label}")
    
    # Filter the dataset for the current label
    filtered_dataset = [data for data in dataset if data[1] == label]
    
    train_size = int(0.7 * len(filtered_dataset))
    valid_size = int(0.15 * len(filtered_dataset))
    test_size = len(filtered_dataset) - train_size - valid_size
    train_dataset, valid_dataset, test_dataset = random_split(filtered_dataset, [train_size, valid_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    with torch.no_grad():
        for batch in test_loader:
            batch_data = batch[0]
            batch_data = batch_data.view(batch_data.size(0), -1)
            batch_data = batch_data.float()
            _, _, h = model.encoder(batch_data)  # Get the encoded representation (h value) directly
            
            encoded_representations.append(h)
            labels_list.extend([label] * len(batch_data))  # Extend with label repeated for each data point

# Convert the encoded representations to a numpy array
encoded_representations = torch.cat(encoded_representations, dim=0).numpy()
labels = np.array(labels_list)

# Create a list of colors for each label
colors = ['r', 'g', 'b', 'c']  # Add more colors as needed

# Create a scatter plot with different colors for each label
plt.figure(figsize=(8, 6))
for label in np.unique(labels):
    mask = labels == label
    plt.scatter(encoded_representations[mask, 0], encoded_representations[mask, 1], 
                alpha=0.5, s=3, label=f'Label {label}', color=colors[label])

plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.title('2D Visualization of Encoded Representations (h values)')
plt.legend()
plt.show()


# latent space plot

In [None]:
def plot_latent(vae, data_loader, num_batches=100):
    vae.eval()  # Set the VAE model to evaluation mode
    all_z = []

    with torch.no_grad():
        for i, batch in enumerate(data_loader):
            x = batch.view(batch.size(0), -1)  # Flatten the data

            mu, log_var = vae.encoder(x)
            z = vae.reparameterize(mu, log_var)

            all_z.append(z)

            if i >= num_batches:
                break

    all_z = torch.cat(all_z, dim=0).cpu().numpy()

    plt.scatter(all_z[:, 0], all_z[:, 1], cmap='tab10')
    plt.colorbar()
    plt.title('Latent Space Visualization')
    plt.xlabel('Latent Dimension 1')
    plt.ylabel('Latent Dimension 2')
    plt.show()


plot_latent(model, test_loader)

In [None]:
def plot_latent(model, data_loader):
    # Set the model to evaluation mode
    model.eval()
    latents = []
    for i, batch in enumerate(data_loader):
        x = batch  # Get only the data (ignore the label)
        x = x.view(x.size(0), -1)
        mu, log_var = model.encoder(x)
        z = model.reparameterize(mu, log_var)
        latents.append(z.detach().cpu().numpy())
    latents = np.concatenate(latents, axis=0)
    plt.figure(figsize=(7,5))
    plt.scatter(latents[:, 0], latents[:, 1], s=2)
    plt.colorbar()
    plt.show()

# Call the function to plot latent representations
plot_latent(model, test_loader)


# see the output

In [None]:
from collections import defaultdict
import pandas as pd

def test_model(model, test_loader):
    model.eval()
    
    # 각 레이블에 대한 원본 및 재구성 데이터 프레임을 저장합니다.
    original_dfs = defaultdict(list)
    reconstructed_dfs = defaultdict(list)

    with torch.no_grad():
        for i, (batch_data, batch_labels) in enumerate(test_loader):
            batch_data = batch_data.view(batch_data.size(0), -1)
            batch_data = batch_data.float()
            reconstructed_batch, _, _ = model(batch_data)

            original_data = batch_data.detach().cpu().numpy()
            reconstructed_data = reconstructed_batch.detach().cpu().numpy()
            labels = batch_labels.detach().cpu().numpy()

            # 각 레이블에 대해 데이터 프레임을 생성하고 저장합니다.
            for label, orig, recon in zip(labels, original_data, reconstructed_data):
                original_dfs[label].append(pd.DataFrame(orig.reshape(1, -1)))
                reconstructed_dfs[label].append(pd.DataFrame(recon.reshape(1, -1)))

    # 각 레이블의 데이터 프레임을 연결합니다.
    for label in original_dfs.keys():
        original_dfs[label] = pd.concat(original_dfs[label])
        reconstructed_dfs[label] = pd.concat(reconstructed_dfs[label])

    return original_dfs, reconstructed_dfs

# 훈련 후 함수를 호출합니다.
original_dfs, reconstructed_dfs = test_model(model, test_loader)

# 예를 들어 레이블 1에 대한 원본 및 재구성된 데이터를 가져옵니다.
label1_original_df = original_dfs[3]
label1_reconstructed_df = reconstructed_dfs[3]


In [None]:
label1_original_df

In [None]:
label1_reconstructed_df

In [None]:
# Get the first row from the dataframe
first_row = label1_reconstructed_df.iloc[0]

# Reshape it to (1000, 10)
reshaped_array = np.reshape(first_row.values, (1000, 10))

# Convert it back to a dataframe
reshaped_df = pd.DataFrame(reshaped_array)
recon_combined_tensor = torch.tensor(reshaped_df.values)


In [None]:
replay(reshaped_df)

In [None]:
# Get the first row from the dataframe
first_row = label1_original_df.iloc[0]

# Reshape it to (1000, 10)
reshaped_array = np.reshape(first_row.values, (1000, 10))

# Convert it back to a dataframe
reshaped_df = pd.DataFrame(reshaped_array)
ori_combined_tensor = torch.tensor(reshaped_df.values)


In [None]:
replay(reshaped_df)

In [None]:
recon_combined_tensor-ori_combined_tensor

In [None]:
# Flatten the tensors
flattened_ori = ori_combined_tensor.flatten()
flattened_recon = recon_combined_tensor.flatten()

# Calculate the Euclidean distance
euclidean_distance = torch.norm(flattened_ori - flattened_recon)


print(euclidean_distance)


In [None]:
ori_combined_np = label1_original_df.to_numpy()
recon_combined_np = label1_reconstructed_df.to_numpy()

# Column names
column_names = ['action space : Torque applied on the first rotor', 
                'action space : Torque applied on the second rotor', 
                'obs0', 'obs1', 'obs2', 'obs3', 'obs4', 'obs5', 'obs6', 'obs7']

# Create a figure
plt.figure(figsize=(20, 20))

# Create subplots for each column
for i in range(10):  # Assuming you have 10 columns
    plt.subplot(5, 2, i+1)  # 5 rows and 2 columns of subplots
    plt.scatter(ori_combined_np[i, :], recon_combined_np[i, :], alpha=0.5, s=5)
    plt.title(column_names[i])
    plt.xlabel('Original')
    plt.ylabel('Reconstructed')

# Display the plot
plt.tight_layout()
plt.show()


In [None]:
ori_combined_np.shape

In [None]:
# Column names
column_names = ['action space : Torque applied on the first rotor', 
                'action space : Torque applied on the second rotor', 
                'obs0', 'obs1', 'obs2', 'obs3', 'obs4', 'obs5', 'obs6', 'obs7']

# Create a figure
plt.figure(figsize=(20, 20))

# Create subplots for each column
for i in range(10):  # Assuming you have 10 columns
    plt.subplot(5, 2, i+1)  # 5 rows and 2 columns of subplots
    plt.scatter(ori_combined_np[:, i], recon_combined_np[:, i], alpha=0.2)
    plt.title(column_names[i])
    plt.xlabel('Original')
    plt.ylabel('Reconstructed')

# Display the plot
plt.tight_layout()
plt.show()


In [None]:
# Get encoded representations (h values) for all data points
encoded_representations = []
labels_list = []

model.eval()
for label in label_order:
    print(f"Training for label {label}")
    
    # Filter the dataset for the current label
    filtered_dataset = [data for data in dataset if data[1] == label]
    
    train_size = int(0.7 * len(filtered_dataset))
    valid_size = int(0.15 * len(filtered_dataset))
    test_size = len(filtered_dataset) - train_size - valid_size
    train_dataset, valid_dataset, test_dataset = random_split(filtered_dataset, [train_size, valid_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    with torch.no_grad():
        for batch in test_loader:
            batch_data = batch[0]
            batch_data = batch_data.view(batch_data.size(0), -1)
            batch_data = batch_data.float()
            _, _, h = model.encoder(batch_data)  # Get the encoded representation (h value) directly
            
            encoded_representations.append(h)
            labels_list.extend([label] * len(batch_data))  # Extend with label repeated for each data point

# Convert the encoded representations to a numpy array
encoded_representations = torch.cat(encoded_representations, dim=0).numpy()
labels = np.array(labels_list)

# Create a list of colors for each label
colors = ['r', 'g', 'b', 'c']  # Add more colors as needed

# Create a scatter plot with different colors for each label
plt.figure(figsize=(8, 6))
for label in np.unique(labels):
    mask = labels == label
    plt.scatter(encoded_representations[mask, 0], encoded_representations[mask, 1], 
                alpha=0.5, s=3, label=f'Label {label}', color=colors[label])

plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.title('2D Visualization of Encoded Representations (h values)')
plt.legend()
plt.show()
