In [6]:
import pandas as pd

df = pd.read_csv("/content/Follow-up_Records.csv")

print(df.head())

   patient_id  visit_date  age_years  weight_kg   bmi  systolic_bp_mmHg  \
0  P-2025-001  2024-02-15         52       83.7  28.3               138   
1  P-2025-001  2024-03-15         52       83.4  28.2               147   
2  P-2025-001  2024-04-15         52       83.1  28.1               140   
3  P-2025-001  2024-05-15         52       83.0  28.1               136   
4  P-2025-001  2024-06-15         52       82.6  27.9               133   

   diastolic_bp_mmHg  heart_rate_bpm  body_temp_C  fasting_glucose_mg_dL  ...  \
0                 86              80         36.8                    137  ...   
1                 89              80         37.0                    140  ...   
2                 84              76         36.8                    122  ...   
3                 88              77         36.8                    112  ...   
4                 88              78         36.8                    101  ...   

   diet_quality_score_0_100  sleep_hours  exercise_sessions_pe

In [7]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import numpy as np

num_cols = df.select_dtypes(include=['int64', 'float64']).columns
cat_cols = df.select_dtypes(include=['object']).columns

encoder = OneHotEncoder(sparse_output=False)
cat_encoded = encoder.fit_transform(df[cat_cols])

scaler = MinMaxScaler(feature_range=(-1, 1))
num_scaled = scaler.fit_transform(df[num_cols])

data_processed = np.hstack((num_scaled, cat_encoded))

In [8]:
import torch
import torch.nn as nn

data_dim = data_processed.shape[1]
latent_dim = 64


class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, data_dim),
            nn.Tanh()
        )
    def forward(self, z):
        return self.model(z)


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(data_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

In [9]:
from torch.utils.data import DataLoader, TensorDataset


real_data = torch.tensor(data_processed, dtype=torch.float32)
dataset = TensorDataset(real_data)
loader = DataLoader(dataset, batch_size=16, shuffle=True)


generator = Generator()
discriminator = Discriminator()


lr = 0.0002
optim_G = torch.optim.Adam(generator.parameters(), lr=lr)
optim_D = torch.optim.Adam(discriminator.parameters(), lr=lr)


criterion = nn.BCELoss()

epochs = 2000
for epoch in range(epochs):
    for real_batch, in loader:
        batch_size = real_batch.size(0)


        real_labels = torch.ones((batch_size, 1))
        fake_labels = torch.zeros((batch_size, 1))


        z = torch.randn(batch_size, latent_dim)
        fake_data = generator(z)

        real_loss = criterion(discriminator(real_batch), real_labels)
        fake_loss = criterion(discriminator(fake_data.detach()), fake_labels)
        d_loss = (real_loss + fake_loss) / 2

        optim_D.zero_grad()
        d_loss.backward()
        optim_D.step()

        z = torch.randn(batch_size, latent_dim)
        fake_data = generator(z)
        g_loss = criterion(discriminator(fake_data), real_labels)

        optim_G.zero_grad()
        g_loss.backward()
        optim_G.step()

    if epoch % 200 == 0:
        print(f"Epoch [{epoch}/{epochs}]  D_loss: {d_loss.item():.4f}  G_loss: {g_loss.item():.4f}")

Epoch [0/2000]  D_loss: 0.6895  G_loss: 0.6677
Epoch [200/2000]  D_loss: 0.2441  G_loss: 1.8498
Epoch [400/2000]  D_loss: 0.5023  G_loss: 1.8544
Epoch [600/2000]  D_loss: 0.2057  G_loss: 2.9249
Epoch [800/2000]  D_loss: 0.4595  G_loss: 1.7949
Epoch [1000/2000]  D_loss: 0.1613  G_loss: 2.5280
Epoch [1200/2000]  D_loss: 0.2245  G_loss: 3.0120
Epoch [1400/2000]  D_loss: 0.1663  G_loss: 2.7962
Epoch [1600/2000]  D_loss: 0.1047  G_loss: 2.4430
Epoch [1800/2000]  D_loss: 0.1257  G_loss: 4.0260


In [10]:
z = torch.randn(10, latent_dim)
synthetic_data_scaled = generator(z).detach().numpy()


num_synthetic = scaler.inverse_transform(synthetic_data_scaled[:, :len(num_cols)])
cat_synthetic = encoder.inverse_transform(synthetic_data_scaled[:, len(num_cols):])


synthetic_df = pd.DataFrame(num_synthetic, columns=num_cols)
synthetic_df[cat_cols] = cat_synthetic

print(synthetic_df)

   age_years  weight_kg        bmi  systolic_bp_mmHg  diastolic_bp_mmHg  \
0  52.038803  81.896332  27.728447        134.106689          77.361267   
1  52.000641  82.311241  27.948013        135.906830          86.289070   
2  52.942543  81.702347  27.815239        131.209366          78.255096   
3  52.999668  81.594528  27.734449        126.937088          77.341309   
4  52.004414  81.728699  27.678802        133.371613          75.674683   
5  52.956451  81.472198  27.597799        127.648651          76.060081   
6  52.000000  83.109482  28.151699        140.511139          90.738853   
7  52.000015  82.623840  28.075672        138.769882          88.800644   
8  52.041256  81.628136  27.659225        131.879074          76.461884   
9  52.000042  82.630127  27.957689        137.106140          87.722031   

   heart_rate_bpm  body_temp_C  fasting_glucose_mg_dL  \
0       74.203690    36.734947              90.613472   
1       76.112778    36.742554              88.449432   
2  