In [1]:
import pandas as pd
import os
import re
import torch
import shutil

# Set GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print("CUDA available:", torch.cuda.is_available())

CUDA available: False


In [None]:
# Define which dataset you want to run
dataset = "adultsample"
seed = 1234
training = "fixed"
Retraining = "Retraining"
split = "70_30"
path = f"/root/workspace/SENTI/Data/{dataset}"

config = {
    "adultsample": {"initial": 1000, "step": 100},
    "australian": {"initial": 230, "step": 23},
    "contraceptive": {"initial": 491, "step": 49},
    "credit": {"initial": 218, "step": 22},
    "flare": {"initial": 355, "step": 35},
    "imdb": {"initial": 1510, "step": 151},
    "mammogram": {"initial": 277, "step": 27},
    "thoracic": {"initial": 157, "step": 15},
}

initial = config[dataset]["initial"]
step = config[dataset]["step"]
%run /root/workspace/Dynamic_Data_Imputation/v2/zCode/main.py \
  --path {path} \
  --datasets {dataset} \
  --seeds {seed} \
  --cum_pcts 0.05 0.05 0.1 0.2\
  --initial {initial} \
  --step {step} \
  --mode all
 
# #--mode inject \   # Only inject nulls:
# #--mode SENT-I \   # Only run FAISS imputation (expects *_nonimputed.csv files already present):
# #--mode all \      # Do both injection and imputation in sequence:

# #________________________Combining all evaluation files and Copying the files to another directory_______


def combine_csvs(input_dir, output_file):
    all_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(f"_{seed}_imputed_evaluation.csv")]
    combined_df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
    output_path = os.path.join(input_dir, output_file)
    combined_df.to_csv(output_path, index=False)
    print(f"Combined CSV saved to {output_path}")

#_________________________________Copying the files to other directories_________________________________

def copy_chunk_csvs(src_dir, dst_dir):
    os.makedirs(dst_dir, exist_ok=True)
    chunk_pattern = re.compile(r'_chunk\d+\.csv$')

    for filename in os.listdir(src_dir):
        if chunk_pattern.search(filename):
            src_file = os.path.join(src_dir, filename)
            dst_file = os.path.join(dst_dir, filename)
            shutil.copy2(src_file, dst_file)
            print(f"Copied: {filename}")

#________________________________________________________________________________________________________


input_dir = f"/root/workspace/SENTI/{dataset}"
output_file = f"SENT-I_evaluations_{dataset}_{seed}.csv"

destination_directory_1 = f"/root/workspace/IPM-main/Data/{dataset}/{split}/{training_fixed}"
destination_directory_2 = f"/root/workspace/IPM-main/Data/{dataset}/{split}/{Retraining}"

# Call the combine function
combine_csvs(input_dir, output_file)

# # Copy to both destinations
copy_chunk_csvs(input_dir, destination_directory_1)
copy_chunk_csvs(input_dir, destination_directory_2)



  # after your %run finishes
import gc, torch
for name in list(globals()):
    obj = globals()[name]
    if hasattr(obj, 'device') and getattr(obj, 'device', None).type == 'cuda':
        del globals()[name]

gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()  # if you used multiprocessing
