In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler

In [4]:
df_train = pd.read_csv('train.csv')

In [5]:
X = df_train.drop(columns=['strength'])
y = df_train['strength']

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.float32)

dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [6]:
class CVAE(nn.Module):
    def __init__(self, x_dim, cond_dim=1, latent_dim=3):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(x_dim + cond_dim, 32),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(32, latent_dim)
        self.fc_logvar = nn.Linear(32, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim + cond_dim, 32),
            nn.ReLU(),
            nn.Linear(32, x_dim)
        )

    def encode(self, x, c):
        xc = torch.cat([x, c], dim=1)
        h = self.encoder(xc)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z, c):
        zc = torch.cat([z, c], dim=1)
        return self.decoder(zc)

    def forward(self, x, c):
        mu, logvar = self.encode(x, c)
        z = self.reparameterize(mu, logvar)
        x_recon = self.decode(z, c)
        return x_recon, mu, logvar

In [7]:
def loss_function(x_recon, x, mu, logvar):
    recon_loss = nn.functional.mse_loss(x_recon, x, reduction='mean')
    kl_div = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_div

In [26]:
x_dim = X.shape[1]
model = CVAE(x_dim=x_dim, cond_dim=1, latent_dim=3)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

epochs = 250
last_values = [0] + [1] * 5
threshold = 0.002

for epoch in range(epochs):
    total_loss = 0
    for x_batch, y_batch in loader:
        optimizer.zero_grad()
        x_recon, mu, logvar = model(x_batch, y_batch.unsqueeze(1))
        loss = loss_function(x_recon, x_batch, mu, logvar)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        last_values[epoch%len(last_values)] = total_loss
    
    absolute_errors = [abs(x - max(last_values)) for x in last_values]
    mae = sum(absolute_errors) / len(absolute_errors)
    if mae < threshold:
        print(f"Early stopping at epoch {epoch} with MAE: {mae:.4f}")
        break

    print(f"Epoch {epoch}, Loss: {total_loss:.4f}")

Epoch 0, Loss: 419.2252
Epoch 1, Loss: 200.9539
Epoch 2, Loss: 97.1265
Epoch 3, Loss: 49.8083
Epoch 4, Loss: 24.5046
Epoch 5, Loss: 11.1290
Epoch 6, Loss: 4.4139
Epoch 7, Loss: 1.9119
Epoch 8, Loss: 1.3778
Epoch 9, Loss: 1.2774
Epoch 10, Loss: 1.2475
Epoch 11, Loss: 1.1953
Epoch 12, Loss: 1.1935
Epoch 13, Loss: 1.1540
Epoch 14, Loss: 1.1894
Epoch 15, Loss: 1.0558
Epoch 16, Loss: 1.0527
Epoch 17, Loss: 1.0619
Epoch 18, Loss: 1.0633
Epoch 19, Loss: 1.0661
Epoch 20, Loss: 1.0270
Epoch 21, Loss: 0.9880
Epoch 22, Loss: 0.9400
Epoch 23, Loss: 0.9816
Epoch 24, Loss: 0.9368
Epoch 25, Loss: 0.9390
Epoch 26, Loss: 0.9032
Epoch 27, Loss: 0.9126
Epoch 28, Loss: 0.8797
Epoch 29, Loss: 0.8859
Epoch 30, Loss: 0.8811
Epoch 31, Loss: 0.8362
Epoch 32, Loss: 0.8202
Epoch 33, Loss: 0.8227
Epoch 34, Loss: 0.8307
Epoch 35, Loss: 0.8074
Epoch 36, Loss: 0.7946
Epoch 37, Loss: 0.7875
Epoch 38, Loss: 0.8020
Epoch 39, Loss: 0.7733
Epoch 40, Loss: 0.7854
Epoch 41, Loss: 0.7605
Epoch 42, Loss: 0.7776
Epoch 43, Los

In [27]:
desired_strength = torch.tensor([[19.77]], dtype=torch.float32)

z = torch.randn(1, 3)

model.eval()
with torch.no_grad():
    generated = model.decode(z, desired_strength)
    generated_original = scaler.inverse_transform(generated.numpy())

print(generated_original)

[[224.92482   70.61229   71.09925  185.87529    4.91794  985.2465
  803.50586   41.930157]]


In [28]:
df_train[df_train['strength'] == 19.77]

Unnamed: 0,cement,slag,ash,water,superplastic,coarseagg,fineagg,age,strength
0,252.5,0.0,0.0,185.7,0.0,1111.6,784.3,28,19.77


In [25]:
df_train.ash.unique()

array([  0. ,  86. , 116. , 118.3, 167. , 122. ,  71.5, 175. , 121.6,
        24.5, 136.6, 187. , 112. ,  94. , 185.3,  94.6, 125.2, 172.4,
        94.1, 132.1, 118.6, 106.9,  95.7, 150.4,  99.9, 174.7, 121.9,
       123.8,  87.5,  77. , 125.1, 143. , 174.2, 127.9, 100.4,  96.2,
        98. , 200.1,  95.6,  78.3,  96.7, 142. , 137.9, 113. , 100.5,
       125.4, 121.4, 124.1, 183.9,  78. , 132. ,  97.4, 138.7, 138. ,
       124.3, 185. , 141. , 195. , 132.6, 126. ,  93.9, 163.3, 173.5,
       107. , 161. ,  60. , 163.8,  82. ,  98.8, 200. , 142.7, 118.2,
        79. , 113.2, 106.2,  86.1, 128. , 158. ,  89.3, 123. , 130. ,
       159.9, 142.8, 124.8, 111. ,  78.4,  92. , 111.9, 120. , 128.6,
       109. , 100. , 143.6,  91. ,  89. ,  90. ])