In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers


In [2]:
data = pd.read_csv(r'data\master\telecom_customer_data.csv')
data.head()

Unnamed: 0,CustomerID,Tenure,Contract,MonthlyCharges,TotalCharges,InternetService,OnlineSecurity,TechSupport,StreamingTV,StreamingMovies,PaymentMethod,SeniorCitizen,PaperlessBilling,Churn
0,6,50,One year,35.65,4091.54,DSL,Yes,No,Yes,No,Mailed check,False,True,False
1,10,32,Two year,30.45,5257.94,Fiber optic,Yes,No,Yes,Yes,Bank transfer,False,False,True
2,3,11,Month-to-month,97.55,1749.5,Fiber optic,No,Yes,No,Yes,Bank transfer,True,False,False
3,10,17,Month-to-month,49.43,1287.6,DSL,No,No,No,Yes,Mailed check,True,True,True
4,10,70,Two year,35.6,7725.76,,No,No,No,Yes,Mailed check,False,False,False


In [3]:
contract_mapping = {'One year' : 0, 'Two year' : 1, 'Month-to-month' : 2}
internet_service_mapping = {'DSL' : 0, 'Fiber optic' : 1}
payment_mapping = {'Mailed check' : 0, 'Bank transfer' : 1, 'Credit card' : 2, 'Electronic check' : 3}
agree_mapping = {'Yes': 0, 'No' :1}

def labelling(data):
    global contract_mapping, internet_service_mapping, payment_mapping, agree_mapping

    data['Contract'] = data['Contract'].map(contract_mapping)
    data['InternetService'] = data['InternetService'].map(internet_service_mapping)
    data['PaymentMethod'] = data['PaymentMethod'].map(payment_mapping)
    data['OnlineSecurity'] = data['OnlineSecurity'].map(agree_mapping)
    data['TechSupport'] = data['TechSupport'].map(agree_mapping)
    data['StreamingTV'] = data['StreamingTV'].map(agree_mapping)
    data['StreamingMovies'] = data['StreamingMovies'].map(agree_mapping)

    return data

data = labelling(data)

In [4]:
data['InternetService'] = data['InternetService'].fillna(data['InternetService'].mean())

In [5]:
# Fill missing values
data.fillna(data.mean(), inplace=True)

# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)



In [6]:
def build_generator(latent_dim, n_features):
    model = tf.keras.Sequential()
    model.add(layers.Dense(64, input_dim=latent_dim, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(n_features, activation='sigmoid'))
    return model

def build_discriminator(n_features):
    model = tf.keras.Sequential()
    model.add(layers.Dense(128, input_dim=n_features, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

latent_dim = 10  # Dimension of the latent space
n_features = scaled_data.shape[1]  # Number of features in the dataset

generator = build_generator(latent_dim, n_features)
discriminator = build_discriminator(n_features)

discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [7]:
def train_gan(generator, discriminator, real_data, epochs=10000, batch_size=32, latent_dim=10):
    for epoch in range(epochs):
        # 1. Select a random batch of real data
        idx = np.random.randint(0, real_data.shape[0], batch_size)
        real_samples = real_data[idx]

        # 2. Generate a batch of synthetic data
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        generated_samples = generator.predict(noise)

        # 3. Create labels for the discriminator
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))

        # 4. Train the discriminator on real and fake data
        d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
        d_loss_fake = discriminator.train_on_batch(generated_samples, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # 5. Train the generator (via the combined model)
        # Update noise
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        # Set the target labels to `real_labels` (ones) to fool the discriminator
        g_loss = gan.train_on_batch(noise, real_labels)

        # Print progress every 1000 epochs
        if epoch % 1000 == 0:
            print(f"Epoch: {epoch} | D Loss: {d_loss[0]} | D Accuracy: {100*d_loss[1]:.2f} | G Loss: {g_loss}")


discriminator.trainable = False
gan_input = layers.Input(shape=(latent_dim,))
generated_sample = generator(gan_input)
gan_output = discriminator(generated_sample)
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer='adam')


In [8]:
train_gan(generator, discriminator, scaled_data)


Epoch: 0 | D Loss: 0.72979336977005 | D Accuracy: 45.31 | G Loss: 0.5704218149185181
Epoch: 1000 | D Loss: 0.04013237729668617 | D Accuracy: 98.44 | G Loss: 5.40476131439209
Epoch: 2000 | D Loss: 0.04358857311308384 | D Accuracy: 98.44 | G Loss: 3.659147262573242
Epoch: 3000 | D Loss: 0.010913178441114724 | D Accuracy: 100.00 | G Loss: 5.513815402984619
Epoch: 4000 | D Loss: 0.004198218259261921 | D Accuracy: 100.00 | G Loss: 5.858841896057129
Epoch: 5000 | D Loss: 0.012750513385981321 | D Accuracy: 100.00 | G Loss: 5.694669723510742
Epoch: 6000 | D Loss: 0.03334486065432429 | D Accuracy: 98.44 | G Loss: 4.221322536468506
Epoch: 7000 | D Loss: 0.11497855000197887 | D Accuracy: 98.44 | G Loss: 4.463555335998535
Epoch: 8000 | D Loss: 0.15388181805610657 | D Accuracy: 96.88 | G Loss: 3.159803867340088
Epoch: 9000 | D Loss: 0.24447182565927505 | D Accuracy: 89.06 | G Loss: 2.860595226287842
Models saved successfully!


In [None]:

import os

# Directory to save models
model_dir = 'data/saved_models'
os.makedirs(model_dir, exist_ok=True)  # Create the directory if it doesn't exist

# Save the generator model
generator.save(os.path.join(model_dir, 'generator_model.h5'))

# Save the discriminator model
discriminator.save(os.path.join(model_dir, 'discriminator_model.h5'))

# Save the GAN model (if needed for continued training)
gan.save(os.path.join(model_dir, 'gan_model.h5'))

print("Models saved successfully!")

In [17]:
def generate_synthetic_data(generator, num_samples=1000):
    noise = np.random.normal(0, 1, size=(num_samples, latent_dim))
    synthetic_data = generator.predict(noise)
    return scaler.inverse_transform(synthetic_data)

synthetic_data = generate_synthetic_data(generator)




In [18]:
synthetic_df = pd.DataFrame(synthetic_data, columns=data.columns)

In [19]:
synthetic_df.head()

Unnamed: 0,CustomerID,Tenure,Contract,MonthlyCharges,TotalCharges,InternetService,OnlineSecurity,TechSupport,StreamingTV,StreamingMovies,PaymentMethod,SeniorCitizen,PaperlessBilling,Churn
0,1.296293,5.269558,2e-06,35.123791,8947.354492,0.012637,1.0,7.206409000000001e-17,1.0,3.158823e-23,2.692718,5.2995400000000005e-33,5.002298e-08,1.0
1,5.96935,34.853134,0.284555,77.972969,6194.143066,0.113853,1.0,0.9999928,0.000586,0.9999958,0.180915,1.444557e-10,0.9949633,0.999942
2,6.95802,30.932276,1.977706,66.319206,8590.914062,0.00316,1.0,0.01640543,0.000188,1.0,0.494409,1.130387e-13,2.209812e-08,0.987568
3,4.004249,13.681754,1.1e-05,68.183861,5439.601562,6.4e-05,1.0,0.9946718,0.994186,4.403467e-07,0.01342,1.419943e-14,9.743256e-07,1.0
4,2.44787,10.908654,7e-06,63.26947,4342.817383,0.000325,1.0,0.9904104,0.936145,4.331661e-13,0.392761,7.013965e-14,0.9258521,1.0


In [20]:
def delabel(df):
    # Reverse mappings
    contract_reverse_mapping = {v: k for k, v in contract_mapping.items()}
    internet_service_reverse_mapping = {v: k for k, v in internet_service_mapping.items()}
    payment_reverse_mapping = {v: k for k, v in payment_mapping.items()}
    agree_reverse_mapping = {v: k for k, v in agree_mapping.items()}
    
    # Apply the reverse mappings column by column, after rounding values
    df['Contract'] = df['Contract'].round().astype('int').map(contract_reverse_mapping)
    df['InternetService'] = df['InternetService'].round().astype('int').map(internet_service_reverse_mapping)
    df['PaymentMethod'] = df['PaymentMethod'].round().astype('int').map(payment_reverse_mapping)
    df['OnlineSecurity'] = df['OnlineSecurity'].round().astype('int').map(agree_reverse_mapping)
    df['TechSupport'] = df['TechSupport'].round().astype('int').map(agree_reverse_mapping)
    df['StreamingTV'] = df['StreamingTV'].round().astype('int').map(agree_reverse_mapping)
    df['StreamingMovies'] = df['StreamingMovies'].round().astype('int').map(agree_reverse_mapping)
    df['SeniorCitizen'] = df['SeniorCitizen'].round().astype('int')
    df['PaperlessBilling'] = df['PaperlessBilling'].round().astype('int')
    df['Churn'] = df['Churn'].round().astype('int')
    df['CustomerID'] = df['CustomerID'].round().astype('int')
    df['Tenure'] = df['Tenure'].round().astype('int')
    
    return df

# Apply the delabel function to the synthetic data
synthetic_df = delabel(synthetic_df)
synthetic_df.head()


Unnamed: 0,CustomerID,Tenure,Contract,MonthlyCharges,TotalCharges,InternetService,OnlineSecurity,TechSupport,StreamingTV,StreamingMovies,PaymentMethod,SeniorCitizen,PaperlessBilling,Churn
0,1,5,One year,35.123791,8947.354492,DSL,No,Yes,No,Yes,Electronic check,0,0,1
1,6,35,One year,77.972969,6194.143066,DSL,No,No,Yes,No,Mailed check,0,1,1
2,7,31,Month-to-month,66.319206,8590.914062,DSL,No,Yes,Yes,No,Mailed check,0,0,1
3,4,14,One year,68.183861,5439.601562,DSL,No,No,No,Yes,Mailed check,0,0,1
4,2,11,One year,63.26947,4342.817383,DSL,No,No,No,Yes,Mailed check,0,1,1


In [23]:
from tensorflow.keras.models import load_model

# Load the models
generator_loaded = load_model(os.path.join(model_dir, 'generator_model.h5'))
discriminator_loaded = load_model(os.path.join(model_dir, 'discriminator_model.h5'))
gan_loaded = load_model(os.path.join(model_dir, 'gan_model.h5'))

print("Models loaded successfully!")


Models loaded successfully!


In [25]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

# # Load the models
# generator_loaded = load_model('saved_models/generator_model.h5')
# discriminator_loaded = load_model('saved_models/discriminator_model.h5')
# gan_loaded = load_model('saved_models/gan_model.h5')

# Recompile discriminator if needed for further training
discriminator_loaded.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002), metrics=['accuracy'])

# Freeze the discriminator within the GAN model and recompile the GAN
discriminator_loaded.trainable = False
gan_loaded.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002))

print("Models recompiled and ready for training!")

Models recompiled and ready for training!


In [22]:
synthetic_df['Churn'].bool()

  synthetic_df['Churn'].bool()


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().