<a href="https://colab.research.google.com/github/RazzaTitian/ADF-X/blob/main/ADF_X.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Advanced Data Factory - Experimental

ADF-X is an ambitious, groundbreaking project designed to create a highly modular, robust, and complex data generation factory. The primary objective is to produce synthetic data that can fulfil all possible variations, ensuring the data is as accurate and comprehensive as possible. ADF-X is not just a data generator; it's a complete data ecosystem tailored to be highly customizable, allowing users to specify the type of data they need, the number of iterations, and the expected amount of data.

# Random Sampling

In [None]:
import pandas as pd
import numpy as np

# Assuming OldData_Room1 is your original DataFrame
OldData_Room1 = pd.read_csv('your_dataset.csv')

# Perform Simple Random Sampling
# The number of samples can be adjusted based on your needs
num_samples = 1000  # Adjust this number

SRS_Room1 = OldData_Room1.sample(n=num_samples, replace=True, random_state=1)

# Now SRS_Room1 contains the randomly sampled data from OldData_Room1



# SMOTE

In [None]:
from imblearn.over_sampling import SMOTE
import pandas as pd

# Initialize SMOTE
smote = SMOTE(sampling_strategy='auto')

# Fit on data
X, y = SRS_Room1.drop('target_column', axis=1), SRS_Room1['target_column']
X_resampled, y_resampled = smote.fit_resample(X, y)

# Combine resampled data into a new DataFrame
SMOTE_Room1 = pd.concat([X_resampled, y_resampled], axis=1)

# Data Perturbation

In [None]:
import numpy as np
import pandas as pd

"""
    This function applies Gaussian noise to each numerical feature in the dataset
    to introduce variability.

    Parameters:
    - data: pd.DataFrame, the dataset to perturb.
    - noise_level: float, the standard deviation of the Gaussian noise as a fraction
                   of the data's standard deviation.

    Returns:
    - perturbed_data: pd.DataFrame, the dataset with added Gaussian noise.
    """

def perturb_data(data, noise_level=0.01):
    # For each numerical column, apply Gaussian noise
    for column in perturbed_data.select_dtypes(include=[np.number]).columns:
        # Calculate the standard deviation of the column
        std = perturbed_data[column].std()

        # Generate Gaussian noise
        noise = np.random.normal(0, std * noise_level, size=perturbed_data[column].shape)

        # Add the noise to the column
        perturbed_data[column] += noise

    return perturbed_data

# Example usage:
# Assuming 'SMOTE_Room1' is the DataFrame containing the SMOTEd data on room 1
# perturbed_df = perturb_data(SMOTE_Room1)

# GAN Section

In [None]:
"""
# Importing necessary libraries
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Reshape, Flatten
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam

In [None]:
"""
# Data Preprocessing
# Assuming df is your DataFrame
df = pd.read_csv('your_dataset.csv')
X_train = df.values

# Create logging directory
log_dir = 'logs'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)