In [None]:
!pip install ctgan


In [7]:
import pandas as pd
from ctgan import CTGAN

#Upload the imbalanced Dataset
df = pd.read_excel('ImbalancedData.xlsx', engine='openpyxl')

# Separating the target class
X = df.drop('I_CLASS', axis=1)
y = df['I_CLASS']


# Identifying categorical columns
categorical_columns=['m2_Wilaya','CATEGORIE','t24_Profession','CODE','t18_Genre','t23_EtatCivil','I_CLASS']


# Splitting data
df_minority = df[df.I_CLASS == 1]
df_majority = df[df.I_CLASS == 0]


# Applying CTGAN to generate synthetic data for the minority class
ctgan = CTGAN(epochs=100)  # Training the network for 100 epochs
ctgan.fit(df_minority, categorical_columns)

num_samples = len(df_majority) - len(df_minority)  # Number of samples needed to balance the dataset
synthetic_data = ctgan.sample(num_samples)


# Assign the minority class to the generated data
synthetic_data['I_CLASS'] = 1


# Combine the original and the generated data
df_synthetic = pd.DataFrame(synthetic_data, columns=df.columns)
df_balanced = pd.concat([df, df_synthetic], ignore_index=True)

# Check the imbalance after applying CTGAN
print("Distribution of the target variable after applying CTGAN:")
print(df_balanced['I_CLASS'].value_counts())

# Save the balanced dataframe
df_balanced.to_excel('BalancedData.xlsx', index=False, engine='openpyxl')


Distribution des classes après CTGAN:
I_CLASS
0    92771
1    92771
Name: count, dtype: int64
