In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import numpy as np


In [None]:
data = pd.read_csv('/content/lungcapacity.csv')

In [None]:
print(data.isna().sum())

LungCap(cc)       10
Age( years)       10
Height(inches)     7
Smoke              6
Gender             2
Caesarean          4
No of children     0
Weight (kg)        7
dtype: int64


In [None]:
data.head()

Unnamed: 0,LungCap(cc),Age( years),Height(inches),Smoke,Gender,Caesarean,No of children,Weight (kg)
0,6.475,6.0,62.1,,male,no,3,85.7
1,10.125,18.0,74.7,yes,female,no,0,98.75
2,9.55,16.0,69.7,no,female,yes,0,11.01
3,11.125,14.0,71.0,no,male,no,1,29.78
4,4.8,5.0,56.9,no,male,no,4,72.84


In [None]:
numeric_features = ['Age( years)', 'Height(inches)', 'Weight (kg)', 'No of children']
categorical_features = ['Smoke', 'Gender', 'Caesarean']

In [None]:
for col in numeric_features:
    data[col].fillna(data[col].mean(), inplace=True)
for col in categorical_features:
    data[col].fillna(data[col].mode()[0], inplace=True)
data['LungCap(cc)'].fillna(data['LungCap(cc)'].mean(), inplace=True)

In [None]:
X = data.drop('LungCap(cc)', axis=1)
y = data['LungCap(cc)']

In [None]:
print(y.isna().sum())

0


In [None]:
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

In [None]:
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder())])

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
X = preprocessor.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

In [None]:
latent_dim = 100
data_dim = X_train.shape[1]

In [None]:
def build_generator(latent_dim, data_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(data_dim, activation='tanh'))
    return model

In [None]:
def build_discriminator(data_dim):
    model = Sequential()
    model.add(Dense(512, input_dim=data_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model


In [None]:
generator = build_generator(latent_dim, data_dim)
discriminator = build_discriminator(data_dim)
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

In [None]:
z = Input(shape=(latent_dim,))
generated_data = generator(z)
discriminator.trainable = False
validity = discriminator(generated_data)

In [None]:
gan = Model(z, validity)
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

In [None]:
def train_gan(epochs, batch_size=128, save_interval=50):
    half_batch = int(batch_size / 2)
    for epoch in range(epochs):
        idx = np.random.randint(0, X_train.shape[0], half_batch)
        real_data = X_train[idx]
        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        synthetic_data = generator.predict(noise)
        d_loss_real = discriminator.train_on_batch(real_data, np.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(synthetic_data, np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.array([1] * batch_size)
        g_loss = gan.train_on_batch(noise, valid_y)
        if epoch % save_interval == 0:
            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100*d_loss[1]}] [G loss: {g_loss}]")

In [None]:
train_gan(epochs=10000, batch_size=32, save_interval=1000)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
6000 [D loss: 0.012367434334009886, acc.: 100.0] [G loss: 7.1772356033325195]
7000 [D loss: 0.11029830004554242, acc.: 93.75] [G loss: 10.62417221069336]
8000 [D loss: 0.03366125375032425, acc.: 96.875] [G loss: 8.84074592590332]
9000 [D loss: 0.016047482145950198, acc.: 100.0] [G loss: 10.178112030029297]


In [None]:
def generate_synthetic_data(generator, num_samples, latent_dim):
    noise = np.random.normal(0, 1, (num_samples, latent_dim))
    synthetic_data = generator.predict(noise)
    return synthetic_data

In [None]:
synthetic_data = generate_synthetic_data(generator, num_samples=746, latent_dim=latent_dim)



In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
X_augmented = np.vstack((X_train, synthetic_data))
y_augmented = np.hstack((y_train, np.random.normal(y_train.mean(), y_train.std(), synthetic_data.shape[0])))
model = Sequential()
model.add(Dense(64, input_dim=X_augmented.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_augmented, y_augmented, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 1.6193758705300056
