## Installing dittto

In [None]:
!pip install dittto

## Importing functions

In [1]:
from dittto import generate_model, generate_synthetic_data

## Checking Functions

In [2]:
help(generate_model)

Help on function generate_model in module dittto.autoencoder:

generate_model(input_shape: int, **kwargs)
    Generates an autoencoder model using the given input shape and optional parameters.
    
    Args:
        input_shape (int): The shape of the input data.
        **kwargs: Optional keyword arguments for configuring the model. Possible arguments are:
            encoder_dense_layers (list): A list of integers representing the number of units in each dense layer of the encoder. Default is [18, 20].
            bottle_neck (int): The number of units in the bottleneck layer. Default is half of the input shape.
            decoder_dense_layers (list): A list of integers representing the number of units in each dense layer of the decoder. Default is [20, 18].
            decoder_activation (str): The activation function to use in the decoder output layer. Default is 'sigmoid'.
            summary (bool): Whether to print the summary of the models. Default is False.
    
    Returns:

In [3]:
help(generate_synthetic_data)

Help on function generate_synthetic_data in module dittto.autoencoder:

generate_synthetic_data(model_name: str, original_df: pandas.core.frame.DataFrame, minority_class_column: str = 'class', minority_class_label: str = '0', decoder_activation: str = 'sigmoid', epochs: int = 100)
    Generates synthetic data using an autoencoder model.
    
    Args:
        model_name (str): Name of the autoencoder model to use. Valid options are 'single_encoder', 'balanced', and 'heavy_decoder'.
        original_df (pd.DataFrame): Original dataset to generate synthetic data from.
        minority_class_column (str, optional): Name of the column containing the minority class label. Defaults to 'class'.
        minority_class_label (str, optional): Label of the minority class. Defaults to '0'.
        decoder_activation (str, optional): Activation function for the decoder layers. Defaults to 'sigmoid'.
        epochs (int, optional): Number of epochs to train the autoencoder model. Defaults to 100.
  

In [47]:
from tensorflow import keras
import pandas as pd
df = pd.read_csv('data.csv')

In [48]:
df.columns

Index(['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11',
       'c12', 'c13', 'c14', 'c15', 'c16', 'c17', 'c18', 'c19', 'c20', 'c21',
       'c22', 'c23', 'c24', 'class'],
      dtype='object')

In [49]:
df.shape

(188, 25)

## Using the generate_synthetic_data() to generate synthetic data

In [50]:
model_name = 'single_encoder'
minority_class_column = 'class'
minority_class_label = '0'
decoder_activation = 'sigmoid'
epochs = 10
original_df = df.copy()

synthetic_df, generated_data, minority_df, majority_df = generate_synthetic_data(model_name=model_name, 
                                                        original_df=original_df, minority_class_column=minority_class_column, 
                                                        minority_class_label=minority_class_label, decoder_activation=decoder_activation, 
                                                        epochs=epochs)

In [51]:
synthetic_df.shape

(359, 25)

## Using the generate_model() to generate autoencoder model

In [52]:
minority_df = df.drop(columns=[minority_class_column])
autoencoder, encoder, decoder = generate_model(input_shape=minority_df.shape[1], summary=True)

Encoder Summary
Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (InputLayer)        [(None, 24)]              0         
                                                                 
 flatten_8 (Flatten)         (None, 24)                0         
                                                                 
 dense_44 (Dense)            (None, 18)                450       
                                                                 
 dense_45 (Dense)            (None, 20)                380       
                                                                 
 dense_46 (Dense)            (None, 12)                252       
                                                                 
Total params: 1082 (4.23 KB)
Trainable params: 1082 (4.23 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Decoder Sum

In [53]:
type(autoencoder)

keras.src.engine.functional.Functional

In [54]:
learning_rate = 0.001
loss = 'mse'
batch_size = 16
validation_split = 0.2
verbose = 0
epochs = 20

opt = keras.optimizers.Adam(learning_rate=learning_rate)
autoencoder.compile(opt, loss=loss)
history = autoencoder.fit(minority_df, minority_df, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=verbose)

synthetic_minority_df = autoencoder.predict(minority_df, verbose=verbose)

In [55]:
reshaped_data = synthetic_minority_df.reshape(len(minority_df), -1)
df_generated = pd.DataFrame(reshaped_data, columns = minority_df.columns)

In [56]:
df_generated[minority_class_column] = minority_class_label
df_generated.shape

(188, 25)