In [27]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [29]:
# Load preprocessed real dataset
real_df = pd.read_csv("freedom_cleaned.csv")
features = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
label_col = 'FreedomStatus_encoded'

In [31]:
# Parameters
latent_dim = 100
num_classes = 3
samples_per_class = 300

In [33]:
# Load trained Generator
class Generator(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.label_emb = torch.nn.Embedding(num_classes, num_classes)
        self.model = torch.nn.Sequential(
            torch.nn.Linear(latent_dim + num_classes, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, len(features)),
            torch.nn.Tanh()
        )

    def forward(self, z, labels):
        c = self.label_emb(labels)
        x = torch.cat([z, c], dim=1)
        return self.model(x)
# Reinitialize generator
generator = Generator()
generator.load_state_dict(torch.load("generator.pth", map_location=torch.device('cpu')))
generator.eval()

Generator(
  (label_emb): Embedding(3, 3)
  (model): Sequential(
    (0): Linear(in_features=103, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=7, bias=True)
    (5): Tanh()
  )
)

In [35]:
# Generate synthetic samples
synthetic_data = []
for class_label in range(num_classes):
    z = torch.randn(samples_per_class, latent_dim)
    labels = torch.full((samples_per_class,), class_label, dtype=torch.long)
    with torch.no_grad():
        gen_samples = generator(z, labels).numpy()
    class_df = pd.DataFrame(gen_samples, columns=features)
    class_df[label_col] = class_label
    synthetic_data.append(class_df)

In [37]:
# Combine synthetic data
synthetic_df = pd.concat(synthetic_data, ignore_index=True)

# Combine with real data
audf = pd.concat([real_df[features + [label_col]], synthetic_df], ignore_index=True)

In [39]:
# Save the augmented dataset
augmented_path = "freedom_augmented.csv"
audf.to_csv(augmented_path, index=False)

print("Augmented dataset saved as", augmented_path)

Augmented dataset saved as freedom_augmented.csv
