In [2]:
%pip install sdv


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Import necessary libraries

In [5]:
from sdv.datasets.local import load_csvs
from sdv.metadata import SingleTableMetadata
import pandas as pd

In [6]:

from sdv.single_table import CopulaGANSynthesizer

Load the dataset for synthetic data generation

In [7]:
guests_table = pd.read_csv("stock_data.csv")

Display basic information about the dataset:

In [8]:
guests_table

Unnamed: 0,Open,High,Low,Close,Adj_Close,Volume
0,49.676899,51.693783,47.669952,49.845802,49.845802,44994500
1,50.178635,54.187561,49.925285,53.805050,53.805050,23005800
2,55.017166,56.373344,54.172661,54.346527,54.346527,18393200
3,55.260582,55.439419,51.450363,52.096165,52.096165,15361800
4,52.140873,53.651051,51.604362,52.657513,52.657513,9257400
...,...,...,...,...,...,...
3680,1207.479980,1216.300049,1200.500000,1205.920044,1205.920044,1017800
3681,1205.939941,1215.670044,1204.130005,1215.000000,1215.000000,950000
3682,1214.989990,1216.219971,1205.030029,1207.150024,1207.150024,907200
3683,1207.890015,1208.689941,1199.859985,1203.839966,1203.839966,860200


Generate metadata from the dataframe

In [10]:
metadata = SingleTableMetadata()

In [11]:
metadata.detect_from_dataframe(data=guests_table)

In [12]:
metadata

{
    "columns": {
        "Open": {
            "sdtype": "numerical"
        },
        "High": {
            "sdtype": "numerical"
        },
        "Low": {
            "sdtype": "numerical"
        },
        "Close": {
            "sdtype": "numerical"
        },
        "Adj_Close": {
            "sdtype": "numerical"
        },
        "Volume": {
            "sdtype": "numerical"
        }
    },
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1"
}

Initialize the CopulaGAN synthesizer with specified parameters

In [13]:
synthesizer = CopulaGANSynthesizer(metadata, enforce_min_max_values=True, enforce_rounding=False, epochs=100, batch_size=100, cuda=False, generator_lr=0.0002, generator_decay=1e-06, discriminator_lr=0.0002, discriminator_decay=1e-06)



parameters of the synthesizer

In [22]:
synthesizer.get_parameters()

{'enforce_min_max_values': True,
 'enforce_rounding': False,
 'locales': ['en_US'],
 'embedding_dim': 128,
 'generator_dim': (256, 256),
 'discriminator_dim': (256, 256),
 'generator_lr': 0.0002,
 'generator_decay': 1e-06,
 'discriminator_lr': 0.0002,
 'discriminator_decay': 1e-06,
 'batch_size': 100,
 'discriminator_steps': 1,
 'log_frequency': True,
 'verbose': False,
 'epochs': 100,
 'pac': 10,
 'cuda': False,
 'numerical_distributions': {},
 'default_distribution': 'beta'}

Fit the model to the dataset

In [15]:
model_copula = synthesizer.fit(guests_table)


Generate synthetic data

In [16]:
new_data = synthesizer.sample(6000)

In [18]:
new_data

Unnamed: 0,Open,High,Low,Close,Adj_Close,Volume
0,751.272597,588.817911,1020.525608,1176.079470,1268.329956,1461343
1,1220.932023,1142.248973,976.645249,564.969605,780.042373,1573974
2,513.883402,606.970192,921.403243,553.662845,70.549628,4584897
3,307.542602,291.515053,291.841689,336.567462,300.043205,7598497
4,1240.821746,1272.938204,557.251988,603.210810,1163.998831,1309691
...,...,...,...,...,...,...
5995,227.386309,565.268126,216.274740,450.975436,249.263588,14808867
5996,583.950668,1112.459258,113.971465,803.575205,567.018189,1285039
5997,251.101808,564.800129,540.224776,116.074842,61.399117,11606614
5998,252.495769,215.091491,51.394985,365.139969,555.341145,8305337


Save the synthetic data to a CSV file

In [19]:
new_data.to_csv('CopulaGAN_Data_for_synthesis.csv', index=False)