

```
##  you need to have ctgan installed on your system. If you have not done it yet, please install ctgan now by executing the command pip install sdv in a terminal.
```



In [1]:
pip install sdv

Collecting sdv
  Downloading sdv-1.17.2-py3-none-any.whl.metadata (13 kB)
Collecting boto3<2.0.0,>=1.28 (from sdv)
  Downloading boto3-1.35.64-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<2.0.0,>=1.31 (from sdv)
  Downloading botocore-1.35.64-py3-none-any.whl.metadata (5.7 kB)
Collecting copulas>=0.12.0 (from sdv)
  Downloading copulas-0.12.0-py3-none-any.whl.metadata (9.1 kB)
Collecting ctgan>=0.10.2 (from sdv)
  Downloading ctgan-0.10.2-py3-none-any.whl.metadata (10 kB)
Collecting deepecho>=0.6.1 (from sdv)
  Downloading deepecho-0.6.1-py3-none-any.whl.metadata (10 kB)
Collecting rdt>=1.13.1 (from sdv)
  Downloading rdt-1.13.1-py3-none-any.whl.metadata (10 kB)
Collecting sdmetrics>=0.17.0 (from sdv)
  Downloading sdmetrics-0.17.0-py3-none-any.whl.metadata (8.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3<2.0.0,>=1.28->sdv)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3<2.0.0,>=1.28->sdv)
  Downloading s

In [28]:
import pandas as pd
from sdv.single_table import CTGANSynthesizer
from sdv.metadata import SingleTableMetadata
from google.colab import files

# Step 1: Load the dataset
file_path = '/content/drive/MyDrive/receivable_details (5).xlsx'  # Update the path as needed
data = pd.read_excel(file_path)

# Step 2: Add a unique numeric ID column if not already present
if 'id' not in data.columns:
    data['id'] = range(1, len(data) + 1)

# Step 3: Define metadata to ensure synthetic data resembles the original dataset
metadata = SingleTableMetadata()

# Manually define column metadata to reflect original structure
metadata.add_column('id', sdtype='id')
metadata.add_column('item_id', sdtype='categorical')
metadata.add_column('transaction_id', sdtype='categorical')
metadata.add_column('transaction_type', sdtype='categorical')
metadata.add_column('transaction_number', sdtype='categorical')
metadata.add_column('reference_number', sdtype='categorical')
metadata.add_column('transaction_date', sdtype='datetime')
metadata.add_column('status', sdtype='categorical')
metadata.add_column('status.1', sdtype='categorical')  # Add this line to match the data
metadata.add_column('item_name', sdtype='categorical')
metadata.add_column('account_id', sdtype='categorical')
metadata.add_column('product_id', sdtype='categorical')
metadata.add_column('product_name', sdtype='categorical')
metadata.add_column('description', sdtype='categorical')
metadata.add_column('quantity_ordered', sdtype='numerical')
metadata.add_column('quantity_invoiced', sdtype='numerical')
metadata.add_column('quantity_cancelled', sdtype='numerical')
metadata.add_column('bcy_item_price', sdtype='numerical')
metadata.add_column('fcy_item_price', sdtype='numerical')
metadata.add_column('bcy_total', sdtype='numerical')
metadata.add_column('customer_name', sdtype='categorical')
metadata.add_column('salesperson_name', sdtype='categorical')
metadata.add_column('customer_id', sdtype='categorical')
metadata.add_column('currency_code', sdtype='categorical')
metadata.add_column('currency_id', sdtype='categorical')
metadata.add_column('project_id', sdtype='categorical')
metadata.add_column('invoice.CF.End  User', sdtype='categorical')

# Step 4: Initialize the CTGAN model with the metadata
model = CTGANSynthesizer(metadata)

# Step 5: Train the model on the original data
model.fit(data)

# Step 6: Save the trained model for future use
model_file_path = "sdv-ctgan-receivable-details.pkl"
model.save(model_file_path)
print(f"Model saved as {model_file_path}")

# Step 7: Generate synthetic data
num_rows_to_generate = 1000  # Adjust the number of synthetic rows as needed
synthetic_data = model.sample(num_rows=num_rows_to_generate)

# Step 8: Save the synthetic data to a CSV file
synthetic_file_path = "synthetic_receivable_data.csv"
synthetic_data.to_csv(synthetic_file_path, index=False)

# Step 9: Provide a download link for the synthetic data
print(f"Synthetic data saved as {synthetic_file_path}. You can download it using the link below:")
files.download(synthetic_file_path)


     Column Name   sdtype datetime_format
transaction_date datetime            None
Without this specification, SDV may not be able to accurately parse the data. We recommend adding datetime formats using 'update_column'.


Model saved as sdv-ctgan-receivable-details.pkl
Synthetic data saved as synthetic_receivable_data.csv. You can download it using the link below:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>