In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import networkx as nx
import datgan
import json

# Set the TF_GPU_ALLOCATOR environment variable
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

### Enable all GPUs (reset to default)
# physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.set_visible_devices(physical_devices, 'GPU')

# Disable all GPUs
tf.config.set_visible_devices([], 'GPU')

#TF version
print(f"Tensorflow Version {tf.__version__}")

# DEFINE DATGAN VERSION

### Set Foldername variable

In [None]:
DatGan_path = '/home/s212574/snap/snapd-desktop-integration/83/Documents/Thesis/MSc_PopSyn/Sigga_Luis/Data/'
os.chdir(DatGan_path)

# folder name
folder_name = 'FolderName' # Locate folder where all the data is stored and results can be saved.

In [None]:
# Load the data

os.chdir(DatGan_path + folder_name)
print(os.getcwd())
# Load the data dtypes dictionary 
file_path = 'df_dtypes.json'
with open(file_path, 'r') as file:
    loaded_df_types = json.load(file)

# Now, 'loaded_dict' contains the dictionary loaded from the JSON file
print(loaded_df_types)

# Load the relation data 
file_path = 'relations.json'  # Define the file path
with open(file_path, 'r') as file:
    loaded_relations = json.load(file)

# Convert the loaded data back to tuples
relations = [tuple(rel) for rel in loaded_relations]


# Load the meta data
file_path = 'data_info.json'  # Replace with your file path
with open(file_path, 'r') as file:
    loaded_data_info = json.load(file)

# Now, 'loaded_data_info' contains the dictionary loaded from the JSON file
print(loaded_data_info)


# Load the graph
graph = nx.read_adjlist('graph.adjlist', create_using=nx.DiGraph)


# Load the train and test data
# Load train data
train_set = pd.read_csv('trainData.csv',sep=',',dtype=loaded_df_types)
train_set.drop(train_set.columns[train_set.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)
# Load test data
test_set= pd.read_csv('testData.csv',sep=',',dtype=loaded_df_types)
test_set.drop(test_set.columns[test_set.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)

# Check the shapes of your sets
print("Training set shape:", train_set.shape)
print("Testing set shape:", test_set.shape)

In [None]:
train_set.info()

# Training


In [None]:
### DatGan
from datgan import DATGAN

# Change the directory to the DatGan folder
os.chdir(os.path.join(DatGan_path, folder_name, 'DatGan'))

batch_size = 1116

datgan = DATGAN(output='./output/',
                batch_size=batch_size,
                num_epochs=1000)

In [None]:
# Training of DATGAN
datgan.preprocess(data=train_set,metadata=loaded_data_info,preprocessed_data_path='./encoded_data/')

In [None]:
# Load Model to sample from DATGAN
new_datgan = datgan.fit(train_set, metadata=loaded_data_info, dag=graph, preprocessed_data_path='./encoded_data')

In [None]:
# Sample from DATGAN
samples = datgan.sample(len(test_set))
samples.to_csv('DatGan_synthetic.csv', index=False)