In [11]:
import numpy as np
import torch

# Função para extrair o tempo das sequências
def extract_time(data):
    time = []
    max_seq_len = 0
    for i in range(len(data)):
        temp_time = len(data[i])
        time.append(temp_time)
        if temp_time > max_seq_len:
            max_seq_len = temp_time
    return time, max_seq_len

# Função para criar um rnn_cell (não utilizada diretamente, mas incluída para completar)
def rnn_cell(module_name, hidden_dim):
    if module_name == 'gru':
        rnn = torch.nn.GRU(hidden_dim, hidden_dim, batch_first=True)
    elif module_name == 'lstm':
        rnn = torch.nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
    else:
        rnn = torch.nn.RNN(hidden_dim, hidden_dim, batch_first=True)
    return rnn

# Gerador de dados aleatórios para o Z
def random_generator(batch_size, z_dim, T_mb, max_seq_len):
    Z_mb = []
    for i in range(batch_size):
        temp = np.zeros([max_seq_len, z_dim])
        temp_Z = np.random.uniform(0., 1, [T_mb[i], z_dim])
        temp[:T_mb[i], :] = temp_Z
        Z_mb.append(temp_Z)
    return Z_mb

# Gerador de lotes (batches) de dados para o treinamento
def batch_generator(ori_data, ori_time, batch_size):
    no = len(ori_data)
    idx = np.random.permutation(no)[:batch_size]
    X_mb = [ori_data[i] for i in idx]
    T_mb = [ori_time[i] for i in idx]
    
    return X_mb, T_mb


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
#from utils import extract_time, random_generator, batch_generator

class TimeGAN:
    def __init__(self, ori_data, parameters):
        # Basic Parameters
        self.ori_data = ori_data
        self.no, self.seq_len, self.dim = np.asarray(ori_data).shape
        self.ori_time, self.max_seq_len = extract_time(ori_data)
        self.max_val = np.max(ori_data)
        self.min_val = np.min(ori_data)
        
        # Network Parameters
        self.hidden_dim = parameters['hidden_dim']
        self.num_layers = parameters['num_layer']
        self.iterations = parameters['iterations']
        self.batch_size = parameters['batch_size']
        self.module_name = parameters['module']
        self.z_dim = self.dim
        self.gamma = 1
        
        # Initialize Networks
        self.embedder = self.Embedder(self.dim, self.hidden_dim, self.num_layers)
        self.recovery = self.Recovery(self.hidden_dim, self.dim, self.num_layers)
        self.generator = self.Generator(self.z_dim, self.hidden_dim, self.num_layers)
        self.supervisor = self.Supervisor(self.hidden_dim, self.num_layers)
        self.discriminator = self.Discriminator(self.hidden_dim, self.num_layers)

        # Optimizers
        self.e_optimizer = optim.Adam(list(self.embedder.parameters()) + list(self.recovery.parameters()))
        self.d_optimizer = optim.Adam(self.discriminator.parameters())
        self.g_optimizer = optim.Adam(list(self.generator.parameters()) + list(self.supervisor.parameters()))
        self.gs_optimizer = optim.Adam(list(self.generator.parameters()) + list(self.supervisor.parameters()))

    class Embedder(nn.Module):
        def __init__(self, input_dim, hidden_dim, num_layers):
            super(TimeGAN.Embedder, self).__init__()
            self.rnn = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
            self.fc = nn.Linear(hidden_dim, hidden_dim)
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            h, _ = self.rnn(x)
            h = self.fc(h)
            h = self.sigmoid(h)
            return h

    class Recovery(nn.Module):
        def __init__(self, hidden_dim, output_dim, num_layers):
            super(TimeGAN.Recovery, self).__init__()
            self.rnn = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
            self.fc = nn.Linear(hidden_dim, output_dim)
            self.sigmoid = nn.Sigmoid()

        def forward(self, h):
            x_tilde, _ = self.rnn(h)
            x_tilde = self.fc(x_tilde)
            x_tilde = self.sigmoid(x_tilde)
            return x_tilde

    class Generator(nn.Module):
        def __init__(self, z_dim, hidden_dim, num_layers):
            super(TimeGAN.Generator, self).__init__()
            self.rnn = nn.LSTM(z_dim, hidden_dim, num_layers, batch_first=True)
            self.fc = nn.Linear(hidden_dim, hidden_dim)
            self.sigmoid = nn.Sigmoid()

        def forward(self, z):
            e_hat, _ = self.rnn(z)
            e_hat = self.fc(e_hat)
            e_hat = self.sigmoid(e_hat)
            return e_hat

    class Supervisor(nn.Module):
        def __init__(self, hidden_dim, num_layers):
            super(TimeGAN.Supervisor, self).__init__()
            self.rnn = nn.LSTM(hidden_dim, hidden_dim, num_layers - 1, batch_first=True)
            self.fc = nn.Linear(hidden_dim, hidden_dim)
            self.sigmoid = nn.Sigmoid()

        def forward(self, h):
            s, _ = self.rnn(h)
            s = self.fc(s)
            s = self.sigmoid(s)
            return s

    class Discriminator(nn.Module):
        def __init__(self, hidden_dim, num_layers):
            super(TimeGAN.Discriminator, self).__init__()
            self.rnn = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
            self.fc = nn.Linear(hidden_dim, 1)

        def forward(self, h):
            y_hat, _ = self.rnn(h)
            y_hat = self.fc(y_hat)
            return y_hat

    def MinMaxScaler(self, data):
        min_val = np.min(np.min(data, axis=0), axis=0)
        max_val = np.max(np.max(data, axis=0), axis=0)
        norm_data = (data - min_val) / (max_val + 1e-7)
        return norm_data, min_val, max_val

    def loss(self, y_real, y_fake, h_real, h_fake, x_real, x_tilde, x_hat, gamma):
        d_loss_real = nn.BCEWithLogitsLoss()(y_real, torch.ones_like(y_real))
        d_loss_fake = nn.BCEWithLogitsLoss()(y_fake, torch.zeros_like(y_fake))
        d_loss_fake_e = nn.BCEWithLogitsLoss()(h_real, torch.zeros_like(h_real))
        d_loss = d_loss_real + d_loss_fake + gamma * d_loss_fake_e

        g_loss_u = nn.BCEWithLogitsLoss()(y_fake, torch.ones_like(y_fake))
        g_loss_u_e = nn.BCEWithLogitsLoss()(h_fake, torch.ones_like(h_fake))
        g_loss_s = nn.MSELoss()(h_real[:, 1:, :], h_fake[:, :-1, :])

        g_loss_v1 = torch.mean(torch.abs(torch.sqrt(torch.var(x_hat, dim=0) + 1e-6) - torch.sqrt(torch.var(x_real, dim=0) + 1e-6)))
        g_loss_v2 = torch.mean(torch.abs(torch.mean(x_hat, dim=0) - torch.mean(x_real, dim=0)))
        g_loss_v = g_loss_v1 + g_loss_v2

        g_loss = g_loss_u + gamma * g_loss_u_e + 100 * torch.sqrt(g_loss_s) + 100 * g_loss_v
        e_loss_t0 = 10 * torch.sqrt(nn.MSELoss()(x_real, x_tilde))
        e_loss = e_loss_t0 + 0.1 * g_loss_s

        return d_loss, g_loss, e_loss

    def train(self):
        # Normalization
        ori_data, min_val, max_val = self.MinMaxScaler(self.ori_data)
        ori_data = torch.FloatTensor(ori_data)

        # TimeGAN training
        print('Start Embedding Network Training')
        for itt in range(self.iterations):
            X_mb, T_mb = batch_generator(ori_data, self.ori_time, self.batch_size)
            X_mb = torch.FloatTensor(X_mb)
            T_mb = torch.LongTensor(T_mb)

            self.e_optimizer.zero_grad()
            H = self.embedder(X_mb)
            X_tilde = self.recovery(H)
            e_loss_t0 = nn.MSELoss()(X_mb, X_tilde)
            e_loss_t0.backward()
            self.e_optimizer.step()

            if itt % 1000 == 0:
                print(f'step: {itt}/{self.iterations}, e_loss: {np.round(np.sqrt(e_loss_t0.item()), 4)}')

        print('Finish Embedding Network Training')

        # Training with Supervised Loss Only
        print('Start Training with Supervised Loss Only')
        for itt in range(self.iterations):
            X_mb, T_mb = batch_generator(ori_data, self.ori_time, self.batch_size)
            Z_mb = random_generator(self.batch_size, self.z_dim, T_mb, self.max_seq_len)

            X_mb = torch.FloatTensor(X_mb)
            Z_mb = torch.FloatTensor(Z_mb)
            T_mb = torch.LongTensor(T_mb)

            self.gs_optimizer.zero_grad()
            E_hat = self.generator(Z_mb)
            H_hat = self.supervisor(E_hat)
            g_loss_s = nn.MSELoss()(H_hat[:, 1:, :], H_hat[:, :-1, :])
            g_loss_s.backward()
            self.gs_optimizer.step()

            if itt % 1000 == 0:
                print(f'step: {itt}/{self.iterations}, s_loss: {np.round(np.sqrt(g_loss_s.item()), 4)}')

        print('Finish Training with Supervised Loss Only')

        # Joint Training
        print('Start Joint Training')
        for itt in range(self.iterations):
            for kk in range(2):
                X_mb, T_mb = batch_generator(ori_data, self.ori_time, self.batch_size)
                Z_mb = random_generator(self.batch_size, self.z_dim, T_mb, self.max_seq_len)

                X_mb = torch.FloatTensor(X_mb)
                Z_mb = torch.FloatTensor(Z_mb)
                T_mb = torch.LongTensor(T_mb)

                self.g_optimizer.zero_grad()
                E_hat = self.generator(Z_mb)
                H_hat = self.supervisor(E_hat)
                X_hat = self.recovery(H_hat)

                H = self.embedder(X_mb)
                X_tilde = self.recovery(H)

                Y_fake = self.discriminator(H_hat)
                Y_real = self.discriminator(H)
                Y_fake_e = self.discriminator(E_hat)

                d_loss, g_loss, e_loss = self.loss(Y_real, Y_fake, H, H_hat, X_mb, X_tilde, X_hat, self.gamma)

                d_loss.backward(retain_graph=True)
                g_loss.backward(retain_graph=True)
                e_loss.backward()

                self.d_optimizer.step()
                self.g_optimizer.step()
                self.e_optimizer.step()
                

            if itt % 1000 == 0:
                print(f'step: {itt}/{self.iterations}, d_loss: {np.round(d_loss.item(), 4)}, g_loss: {np.round(g_loss.item(), 4)}, e_loss: {np.round(e_loss.item(), 4)}')

        print('Finish Joint Training')

    def generate(self, num_samples):
        Z_mb = random_generator(num_samples, self.z_dim, [self.max_seq_len]*num_samples, self.max_seq_len)
        Z_mb = torch.FloatTensor(Z_mb)
        E_hat = self.generator(Z_mb)
        H_hat = self.supervisor(E_hat)
        X_hat = self.recovery(H_hat)
        generated_data = X_hat.detach().numpy()
        
        generated_data = generated_data * (self.max_val - self.min_val + 1e-7) + self.min_val
        return generated_data

# Example Usage
parameters = {
    'hidden_dim': 24,
    'num_layer': 3,
    'iterations': 5000,
    'batch_size': 128,
    'module': 'gru'
}




In [13]:
import os
import torch
import numpy as np
from standartized_balanced import StandardizedBalancedDataset

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Função para carregar os dados
def get_data(dataset_name, sensors, normalize_data):    
    data_folder = f"/HDD/dados/amparo/meta4/M4-Framework-Experiments/experiments/experiment_executor/data/standartized_balanced/{dataset_name}/"
    dataset = StandardizedBalancedDataset(data_folder, sensors=sensors)
    X_train, y_train, X_test, y_test, X_val, y_val = dataset.get_all_data(normalize_data=normalize_data, resize_data=True)
    return X_train, y_train, X_test, y_test, X_val, y_val

# Função para extrair o tempo das sequências
def extract_time(data):
    time = []
    max_seq_len = 0
    for i in range(len(data)):
        temp_time = len(data[i])
        time.append(temp_time)
        if temp_time > max_seq_len:
            max_seq_len = temp_time
    return time, max_seq_len

# Função para gerar lotes de dados
def batch_generator(X, T, batch_size):
    no = len(X)
    idx = np.random.permutation(no)[:batch_size]
    X_mb = [X[i] for i in idx]
    T_mb = [T[i] for i in idx]
    
    return np.array(X_mb), np.array(T_mb)

# Carregar os dados
X_train, y_train, X_test, y_test, X_val, y_val = get_data("MotionSense", ['accel', 'gyro'], False)

# Obter o formato de entrada
input_shape = X_train[0].shape
print("X_train.shape", X_train.shape)
print("y_train.shape", y_train.shape)

# Separar os dados por classe
class_data = defaultdict(list)
for X, y in zip(X_train, y_train):
    class_data[y].append(X)

# Gerar dados sintéticos para cada classe
synthetic_data_by_class = {}
num_samples_per_class = 100  # Número de amostras sintéticas para gerar por classe

for class_label, X_class_data in class_data.items():
    X_class_data = np.array(X_class_data)
    ori_time, _ = extract_time(X_class_data)
    
    # Treinar um TimeGAN para esta classe
    timegan = TimeGAN(X_class_data, parameters)
    timegan.train()  # Aqui, você poderia ajustar `retain_graph=True` se necessário.
    
    # Gerar dados sintéticos
    synthetic_data = timegan.generate(num_samples=num_samples_per_class)
    synthetic_data_by_class[class_label] = synthetic_data
    
    print(f"Generated {num_samples_per_class} synthetic samples for class {class_label}")

# Verifique os dados sintéticos gerados para cada classe
for class_label, synthetic_data in synthetic_data_by_class.items():
    print(f"Class {class_label} synthetic data shape: {synthetic_data.shape}")


X_train.shape (3558, 60, 6)
y_train.shape (3558,)
Start Embedding Network Training
step: 0/5000, e_loss: 0.3781
step: 1000/5000, e_loss: 0.0997
step: 2000/5000, e_loss: 0.0972
step: 3000/5000, e_loss: 0.1022
step: 4000/5000, e_loss: 0.098
Finish Embedding Network Training
Start Training with Supervised Loss Only
step: 0/5000, s_loss: 0.0006
step: 1000/5000, s_loss: 0.0
step: 2000/5000, s_loss: 0.0
step: 3000/5000, s_loss: 0.0
step: 4000/5000, s_loss: 0.0
Finish Training with Supervised Loss Only
Start Joint Training
step: 0/5000, d_loss: 2.6841, g_loss: 58.7047, e_loss: 1.0383
step: 1000/5000, d_loss: 2.389, g_loss: 23.0655, e_loss: 1.8149
step: 2000/5000, d_loss: 2.3636, g_loss: 23.0529, e_loss: 1.7927
step: 3000/5000, d_loss: 2.543, g_loss: 23.0947, e_loss: 1.8267
step: 4000/5000, d_loss: 2.3637, g_loss: 23.2684, e_loss: 1.8221
Finish Joint Training
Generated 100 synthetic samples for class 4
Start Embedding Network Training
step: 0/5000, e_loss: 0.3483
step: 1000/5000, e_loss: 0.131

In [14]:
synthetic_data = timegan.generate(num_samples=100)

In [16]:
synthetic_data.shape

(100, 60, 6)

In [20]:
synthetic_data_by_class.shape

AttributeError: 'dict' object has no attribute 'shape'

In [21]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from collections import defaultdict
from standartized_balanced import StandardizedBalancedDataset

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Função para carregar os dados
def get_data(dataset_name, sensors, normalize_data):    
    data_folder = f"/HDD/dados/amparo/meta4/M4-Framework-Experiments/experiments/experiment_executor/data/standartized_balanced/{dataset_name}/"
    dataset = StandardizedBalancedDataset(data_folder, sensors=sensors)
    X_train, y_train, X_test, y_test, X_val, y_val = dataset.get_all_data(normalize_data=normalize_data, resize_data=True)
    return X_train, y_train, X_test, y_test, X_val, y_val

# Função para extrair o tempo das sequências
def extract_time(data):
    time = []
    max_seq_len = 0
    for i in range(len(data)):
        temp_time = len(data[i])
        time.append(temp_time)
        if temp_time > max_seq_len:
            max_seq_len = temp_time
    return time, max_seq_len

# Função para gerar lotes de dados
def batch_generator(X, T, batch_size):
    no = len(X)
    idx = np.random.permutation(no)[:batch_size]
    X_mb = [X[i] for i in idx]
    T_mb = [T[i] for i in idx]
    
    return np.array(X_mb), np.array(T_mb)

# Carregar os dados
X_train, y_train, X_test, y_test, X_val, y_val = get_data("MotionSense", ['accel', 'gyro'], False)

# Obter o formato de entrada
input_shape = X_train[0].shape
print("X_train.shape", X_train.shape)
print("y_train.shape", y_train.shape)

# Separar os dados por classe
class_data = defaultdict(list)
for X, y in zip(X_train, y_train):
    class_data[y].append(X)

# Gerar dados sintéticos para cada classe
synthetic_data_by_class = {}
num_samples_per_class = 100  # Número de amostras sintéticas para gerar por classe

for class_label, X_class_data in class_data.items():
    X_class_data = np.array(X_class_data)
    ori_time, _ = extract_time(X_class_data)
    
    # Treinar um TimeGAN para esta classe
    timegan = TimeGAN(X_class_data, parameters)
    timegan.train()  # Aqui, você poderia ajustar `retain_graph=True` se necessário.
    
    # Gerar dados sintéticos
    synthetic_data = timegan.generate(num_samples=num_samples_per_class)
    synthetic_data_by_class[class_label] = synthetic_data
    
    print(f"Generated {num_samples_per_class} synthetic samples for class {class_label}")



X_train.shape (3558, 60, 6)
y_train.shape (3558,)
Start Embedding Network Training
step: 0/5000, e_loss: 0.3991
step: 1000/5000, e_loss: 0.1012
step: 2000/5000, e_loss: 0.0972
step: 3000/5000, e_loss: 0.1007
step: 4000/5000, e_loss: 0.1017
Finish Embedding Network Training
Start Training with Supervised Loss Only
step: 0/5000, s_loss: 0.0004
step: 1000/5000, s_loss: 0.0
step: 2000/5000, s_loss: 0.0
step: 3000/5000, s_loss: 0.0
step: 4000/5000, s_loss: 0.0
Finish Training with Supervised Loss Only
Start Joint Training
step: 0/5000, d_loss: 2.691, g_loss: 58.9656, e_loss: 0.9683
step: 1000/5000, d_loss: 2.553, g_loss: 23.091, e_loss: 1.8271
step: 2000/5000, d_loss: 2.4912, g_loss: 22.9192, e_loss: 1.8178
step: 3000/5000, d_loss: 2.5318, g_loss: 23.3249, e_loss: 1.8342
step: 4000/5000, d_loss: 2.5568, g_loss: 23.2524, e_loss: 1.8374
Finish Joint Training
Generated 100 synthetic samples for class 4
Start Embedding Network Training
step: 0/5000, e_loss: 0.3289
step: 1000/5000, e_loss: 0.131

AttributeError: 'list' object has no attribute 'reshape'

In [52]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Your existing code to generate synthetic data
# Assuming synthetic_data_by_class and X_train, y_train are already defined

# Combine original and synthetic data
X_combined = []
y_combined = []
data_type_combined = []  # To indicate whether data is 'original' or 'synthetic'

# Color map for classes
base_colors = plt.cm.tab10.colors  # Base colors from matplotlib
color_map = {}

# Generate color map for classes
for i, class_label in enumerate(sorted(set(y_train))):
    base_color = base_colors[i % len(base_colors)]
    color_map[class_label] = f'rgba({int(base_color[0]*255)}, {int(base_color[1]*255)}, {int(base_color[2]*255)}, 0.6)'  # Light color

# Process synthetic data
for class_label, synthetic_data in synthetic_data_by_class.items():
    synthetic_data_flat = np.array([x.reshape(-1) for x in synthetic_data])
    X_combined.append(synthetic_data_flat)
    y_combined.extend([f'{class_label}G'] * synthetic_data.shape[0])  # Append 'G' for generated
    data_type_combined.extend(['synthetic'] * synthetic_data.shape[0])

# Process original data
X_train_flat = np.array([x.reshape(-1) for x in X_train])
y_train_flat = np.array(y_train)

X_combined.append(X_train_flat)
y_combined.extend([f'{label}O' for label in y_train_flat])  # Append 'O' for original
data_type_combined.extend(['original'] * X_train_flat.shape[0])

X_combined = np.vstack(X_combined)
data_type_combined = np.array(data_type_combined)

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=0)
reduced_data = tsne.fit_transform(X_combined)

# Create DataFrame for Plotly
df = pd.DataFrame(reduced_data, columns=['Component 1', 'Component 2'])
df['Class'] = y_combined
df['Data Type'] = data_type_combined

# Create scatter plot traces
fig = go.Figure()

# Create scatter plot for all data
for class_label in sorted(set(y_combined)):
    class_data = df[df['Class'] == class_label]
    fig.add_trace(go.Scatter(
        x=class_data['Component 1'],
        y=class_data['Component 2'],
        mode='markers',
        name=f'{class_label}',
        marker=dict(color=color_map[class_label[:-1]], size=8),  # Use color_map based on class without suffix
        showlegend=True
    ))

# Customize layout
fig.update_layout(
    title='t-SNE Visualization of Original and Synthetic Data',
    xaxis_title='Component 1',
    yaxis_title='Component 2',
    legend_title='Class'
)

# Show the plot
fig.show()


KeyError: '0'

In [56]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Your existing code to generate synthetic data
# Assuming synthetic_data_by_class and X_train, y_train are already defined

# Combine original and synthetic data
X_combined = []
y_combined = []
data_type_combined = []  # To indicate whether data is 'original' or 'synthetic'

# Color map for classes
base_colors = plt.cm.tab10.colors  # Base colors from matplotlib
color_map = {}

# Generate color map for classes
for i, class_label in enumerate(sorted(set(y_train))):
    base_color = base_colors[i % len(base_colors)]
    color_map[class_label] = f'rgba({int(base_color[0]*255)}, {int(base_color[1]*255)}, {int(base_color[2]*255)}, 0.6)'  # Light color

# Process synthetic data
for class_label, synthetic_data in synthetic_data_by_class.items():
    synthetic_data_flat = np.array([x.reshape(-1) for x in synthetic_data])
    X_combined.append(synthetic_data_flat)
    y_combined.extend([f'{class_label}_G'] * synthetic_data.shape[0])  # Append '_G' for generated
    data_type_combined.extend(['synthetic'] * synthetic_data.shape[0])

# Process original data
X_train_flat = np.array([x.reshape(-1) for x in X_train])
y_train_flat = np.array(y_train)

X_combined.append(X_train_flat)
y_combined.extend([f'{label}_O' for label in y_train_flat])  # Append '_O' for original
data_type_combined.extend(['original'] * X_train_flat.shape[0])

X_combined = np.vstack(X_combined)
data_type_combined = np.array(data_type_combined)

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=0)
reduced_data = tsne.fit_transform(X_combined)

# Create DataFrame for Plotly
df = pd.DataFrame(reduced_data, columns=['Component 1', 'Component 2'])
df['Class'] = y_combined
df['Data Type'] = data_type_combined

# Create scatter plot traces
fig = go.Figure()

# Create scatter plot for all data
for class_label in sorted(set(y_combined)):
    class_data = df[df['Class'] == class_label]
    # Use the base color for original and synthetic data separately
    #color = color_map[class_label.split('_')[0]]  # Use the color map for the base class
    fig.add_trace(go.Scatter(
        x=class_data['Component 1'],
        y=class_data['Component 2'],
        mode='markers',
        name=class_label,
        marker=dict( size=8),  # Use the color map based on the base class
        showlegend=True
    ))

# Customize layout
fig.update_layout(
    title='t-SNE Visualization of Original and Synthetic Data',
    xaxis_title='Component 1',
    yaxis_title='Component 2',
    legend_title='Class'
)

# Show the plot
fig.show()


In [66]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np

# Inicializar o modelo SVC
clf = SVC()

# Preparar os arrays para combinar os dados sintéticos e originais
X_combined = []
y_combined = []

# Processar os dados sintéticos
for class_label, synthetic_data in synthetic_data_by_class.items():
    synthetic_data_flat = np.array([x.reshape(-1) for x in synthetic_data])
    X_combined.append(synthetic_data_flat)
    y_combined.extend([class_label] * synthetic_data.shape[0])  # Repetir o rótulo para cada amostra gerada

# Processar os dados originais
X_train_flat = np.array([x.reshape(-1) for x in X_train])
y_train_flat = np.array(y_train)

X_combined.append(X_train_flat)
y_combined.extend(y_train_flat)  # Adicionar rótulos originais

# Combinar os dados em um único array
X_combined = np.vstack(X_combined)
y_combined = np.array(y_combined)

# Treinar o modelo com os dados combinados
clf.fit(X_combined, y_combined)

# Prever e avaliar a acurácia no conjunto de teste
y_pred_combined = clf.predict(X_test)  # Teste no conjunto de teste, não no conjunto combinado
accuracy_combined = accuracy_score(y_test, y_pred_combined)
print(f'Accuracy with combined data: {accuracy_combined:.2f}')


ValueError: Found array with dim 3. SVC expected <= 2.

In [42]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.manifold import TSNE

# Assuming synthetic_data_by_class is a dictionary where keys are class labels
# and values are numpy arrays of synthetic data for that class.
# Example: synthetic_data_by_class = {0: np.array(...), 1: np.array(...), ...}

# Prepare data for t-SNE
all_data = []
all_labels = []
for class_label, synthetic_data in synthetic_data_by_class.items():
    # Flatten each sample to a 1D array
    flattened_data = synthetic_data.reshape(synthetic_data.shape[0], -1)
    
    all_data.append(flattened_data)
    all_labels.extend([class_label] * synthetic_data.shape[0])

all_data = np.vstack(all_data)
all_labels = np.array(all_labels)
print(len(all_data))

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=0)
reduced_data = tsne.fit_transform(all_data)

# Create a DataFrame for Plotly
df = pd.DataFrame(reduced_data, columns=['Component 1', 'Component 2'])
df['Class'] = all_labels

# Create scatter plot traces for each class
fig = go.Figure()

for class_label in df['Class'].unique():
    class_data = df[df['Class'] == class_label]
    fig.add_trace(go.Scatter(
        x=class_data['Component 1'],
        y=class_data['Component 2'],
        mode='markers',
        name=f'Class {class_label}',
       
    ))




# Show the plot
fig.show()


600


In [35]:
pip install plotly

Defaulting to user installation because normal site-packages is not writeable
Collecting plotly
  Downloading plotly-5.23.0-py3-none-any.whl.metadata (7.3 kB)
Downloading plotly-5.23.0-py3-none-any.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: plotly
Successfully installed plotly-5.23.0
Note: you may need to restart the kernel to use updated packages.


In [29]:
print(synthetic_data_by_class[0])

[[[8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  ...
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]]

 [[8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  ...
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]]

 [[8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  ...
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.423164]
  [8.423164 8.423164 8.423164 8.423164 8.423164 8.