In [9]:
from copycat_framework.copycat import Copycat, Problem, Config

from sys import exit

from torch import cuda

from copycat_framework.copycat.utils import set_seeds

'''Quero fazer o seguinte:
Treinar 1 oraculo com bae no dataset MNIST e após isso realizar o ataque de extração para o copycat.
Depois quero treinar outro oraculo com o SVHN e então realizar o ataque de extração para o mesmo modelo copycat.
Assim o copycat terá 2 conhecimento de 2 domínios diferentes.'''
''

### Option Class

In [11]:
class Options:
    def __init__(self,
                 #required parameters:
                 problem_name, oracle_filename, copycat_filename, finetune_filename,
                 train_oracle,
                 train_copycat, label_copycat_dataset,
                 train_finetune, label_finetune_dataset,
                 only_print_reports,
                 #general configutation:
                 config_file=None,
                 validation_step=None,
                 save_snapshot=None,
                 #oracle:
                 oracle_arch=None, oracle_max_epochs=None, oracle_batch_size=None, oracle_lr=None,
                 oracle_gamma=None, oracle_dataset_root=None, oracle_resume_filename=None,
                 #copycat:
                 copycat_arch=None, copycat_max_epochs=None, copycat_batch_size=None, copycat_lr=None,
                 copycat_gamma=None, copycat_dataset_root=None, copycat_balance_dataset=None, copycat_resume_filename=None,
                 #finetune:
                 finetune_max_epochs=None, finetune_batch_size=None, finetune_lr=None,
                 finetune_gamma=None, finetune_dataset_root=None, finetune_balance_dataset=None, finetune_resume_filename=None,
                 #seed
                 seed=None):
        #required:
        self.problem_name = problem_name
        self.oracle_filename = oracle_filename
        self.copycat_filename = copycat_filename
        self.finetune_filename = finetune_filename
        self.train_oracle = train_oracle
        self.train_copycat = train_copycat
        self.label_copycat_dataset = label_copycat_dataset
        self.train_finetune = train_finetune
        self.label_finetune_dataset = label_finetune_dataset
        self.only_print_reports = only_print_reports
        #genearal configuration:
        self.config = None
        self.config_file = config_file
        self.validation_step = validation_step
        self.save_snapshot = save_snapshot
        self.problems = self.get_problem_names()
        #oracle:
        self.oracle_arch=oracle_arch
        self.oracle_resume_filename = oracle_resume_filename
        self.oracle_max_epochs = oracle_max_epochs
        self.oracle_batch_size = oracle_batch_size
        self.oracle_lr = oracle_lr
        self.oracle_gamma = oracle_gamma
        self.oracle_dataset_root = oracle_dataset_root
        #copycat:
        self.copycat_arch = copycat_arch
        self.copycat_resume_filename = copycat_resume_filename
        self.copycat_max_epochs = copycat_max_epochs
        self.copycat_batch_size = copycat_batch_size
        self.copycat_lr = copycat_lr
        self.copycat_gamma = copycat_gamma
        self.copycat_dataset_root = copycat_dataset_root
        self.copycat_balance_dataset = copycat_balance_dataset
        #finetune:
        self.finetune_resume_filename = finetune_resume_filename
        self.finetune_max_epochs = finetune_max_epochs
        self.finetune_batch_size = finetune_batch_size
        self.finetune_lr = finetune_lr
        self.finetune_gamma = finetune_gamma
        self.finetune_dataset_root = finetune_dataset_root
        self.finetune_balance_dataset = finetune_balance_dataset
        #seed
        self.seed = seed

    def __load_config(self):
        if self.config is None:
            self.config = Config(self.config_file)
        return self.config

    def parse_value(self, attr_name):
        self.__load_config()
        if attr_name.startswith('oracle'):
            opts = self.config.get_oracle_options(self.problem_name)
            config_key = attr_name.replace('oracle_','')
        elif attr_name.startswith('copycat'):
            opts = self.config.get_copycat_options(self.problem_name)
            config_key = attr_name.replace('copycat_','')
        elif attr_name.startswith('finetune'):
            opts = self.config.get_finetune_options(self.problem_name)
            config_key = attr_name.replace('finetune_','')
        else:
            opts = self.config.get_general_options()
            config_key = attr_name
        try:
            local_value = getattr(self, attr_name)
            config_value = opts[config_key] if config_key in opts else None
            return local_value if local_value is not None else config_value
        except:
            return None

    def get_db_name(self, model, db_name):
        self.__load_config()
        name = self.config.get_value(self.problem_name, db_name, model=model)
        if name is not None:
            aux = self.config.get_value(self.problem_name, 'data', model=model)['datasets']
            if name in aux:
                return name, aux[name]
            else:
                return name, None
        return None, None

    def get_problem_names(self):
        return Config(self.config_file).get_problem_names()

    def __get_repr_oracle(self):
        fmt = f"  Oracle:\n"
        if self.train_oracle:
            fmt+= f"     Model filename: '{self.oracle_filename}'\n"
            fmt+= f"     Model arch: {self.oracle_arch}\n"
        if self.oracle_resume_filename is not None:
            fmt+= f"     Resume filename: '{self.oracle_resume_filename}'\n"
        # max epochs
        if self.train_oracle and not self.only_print_reports:
            fmt+= f"     Maximum training epochs: {self.parse_value('oracle_max_epochs')}\n"
            # batch size
            fmt+= f"     Batch size: {self.parse_value('oracle_batch_size')}\n"
            # lr
            fmt+= f"     Learning Rate: {self.parse_value('oracle_lr')}\n"
            # gamma
            fmt+= f"     Gamma: {self.parse_value('oracle_gamma')}\n"
            db = self.get_db_name(model='oracle', db_name='db_train')
            if db[0] is not None:
                fmt+= f"     Dataset: {db[0]}"
                if db[1] is not None:
                    fmt+= f" ('{db[1]}')"
                fmt+= '\n'
            db_test = self.get_db_name(model='oracle', db_name='db_test')
            if db_test[0] is not None:
                fmt+= f"     Test dataset: {db_test[0]}"
                if db_test[1] is not None:
                    fmt+= f" ('{db_test[1]}')"
                fmt+= '\n'
        else:
            fmt+= f"     It will NOT be trained.\n"
        db_root = self.parse_value('oracle_dataset_root')
        if db_root != '': fmt+= f"     Dataset root: '{db_root}'\n"
        return fmt

    def __get_repr_copycat(self):
        fmt = f"  Copycat:\n"
        if self.train_copycat:
            fmt+= f"     Model filename: '{self.copycat_filename}'\n"
            fmt+= f"     Model arch: {self.copycat_arch}\n"
        if self.copycat_resume_filename is not None:
            fmt+= f"     Resume filename: '{self.copycat_resume_filename}'\n"
        if self.train_copycat and not self.only_print_reports:
            fmt+= f"     Maximum training epochs: {self.parse_value('copycat_max_epochs')}\n"
            # batch size
            fmt+= f"     Batch size: {self.parse_value('copycat_batch_size')}\n"
            # lr
            fmt+= f"     Learning Rate: {self.parse_value('copycat_lr')}\n"
            # gamma
            fmt+= f"     Gamma: {self.parse_value('copycat_gamma')}\n"
            db = self.get_db_name(model='copycat', db_name='db_train')
            if db[0] is not None:
                fmt+= f"     Dataset: {db[0]}"
                if db[1] is not None:
                    fmt+= f" ('{db[1]}')"
                fmt+= '\n'
            db_test = self.get_db_name(model='copycat', db_name='db_test')
            if db_test[0] is not None:
                fmt+= f"     Test dataset: {db_test[0]}"
                if db_test[1] is not None:
                    fmt+= f" ('{db_test[1]}')"
                fmt+= '\n'
            fmt+= f"     The training dataset will {'' if self.parse_value('copycat_balance_dataset') else 'NOT '}be balanced.\n"
            fmt+= f"     The training dataset will {'' if self.label_copycat_dataset else 'NOT '}be labeled by the Oracle Model.\n"
        else:
            fmt+= f"     It will NOT be trained.\n"
        db_root = self.parse_value('copycat_dataset_root')
        if db_root != '': fmt+= f"     Dataset root: '{db_root}'\n"
        return fmt

    def __get_repr_finetune(self):
        fmt = f"  Copycat Finetuning:\n"
        if self.train_finetune:
            fmt+= f"     Model filename: '{self.finetune_filename}'\n"
        if self.finetune_resume_filename is not None:
            fmt+= f"     Resume filename: '{self.finetune_resume_filename}'\n"
        if self.train_finetune and not self.only_print_reports:
            fmt+= f"     Maximum training epochs: {self.parse_value('finetune_max_epochs')}\n"
            # batch size
            fmt+= f"     Batch size: {self.parse_value('finetune_batch_size')}\n"
            # lr
            fmt+= f"     Learning Rate: {self.parse_value('finetune_lr')}\n"
            # gamma
            fmt+= f"     Gamma: {self.parse_value('finetune_gamma')}\n"
            db = self.get_db_name(model='finetune', db_name='db_train')
            if db[0] is not None:
                fmt+= f"     Dataset: {db[0]}"
                if db[1] is not None:
                    fmt+= f" ('{db[1]}')"
                fmt+= '\n'
            fmt+= f"     The dataset will {'' if self.parse_value('finetune_balance_dataset') else 'NOT '}be balanced.\n"
            fmt+= f"     The training dataset will {'' if self.label_finetune_dataset else 'NOT '}be labeled by the Oracle Model.\n"
        else:
            fmt+= f"     It will NOT be trained.\n"
        db_root = self.parse_value('finetune_dataset_root')
        if db_root != '': fmt+= f"     Dataset root: '{db_root}'\n"
        return fmt

    def __repr__(self) -> str:
        fmt = "Options:\n"
        fmt+= f"  Problem: {self.problem_name}\n"
        ## ORACLE
        fmt+=self.__get_repr_oracle()
        ## COPYCAT:
        fmt+=self.__get_repr_copycat()
        ## COPYCAT FINETUNE:
        fmt+=self.__get_repr_finetune()
        ## REPORTS:
        fmt+='\n'
        if self.validation_step != 0 and not self.only_print_reports:
            fmt+= f"  Validation Steps: {self.parse_value('validation_step')}\n"
            fmt+= f"  A snapshot of the model will {'' if self.parse_value('save_snapshot') else 'NOT '}be saved for each validation step.\n"
        
        if self.only_print_reports:
            fmt+= "\nNOTE: As 'only-print-reports' was selected, the models will only be loaded (or created with random parameters) and tested.\n"
            fmt+= "NOTE: THE MODELS WILL NOT BE TRAINED!!!"
        
        fmt+= f"\nDevice to use: '{cuda.get_device_name()}'\n"
        
        if self.seed is not None:
            fmt+= f"\nThe following seed will be used for Torch, Numpy and Random: {self.seed}\n"
        return fmt

def parse_boolean(value):
    if isinstance(value, bool):
        return value
    if value.lower() in {'false', 'f', '0', 'no', 'n', 'not', 'dont'}:
        return False
    elif value.lower() in {'true', 't', '1', 'yes', 'y', 'yeah', 'yeap', 'ofcouse'}:
        return True
    raise ValueError(f'{value} is not a valid boolean value')

def get_problem_names():
    p_aux = argparse.ArgumentParser()
    p_aux.add_argument('--config-file')
    aux_arg, _ = p_aux.parse_known_args()    
    return Config(aux_arg.config_file).get_problem_names()

def parse_params():
    problem_names = get_problem_names()
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--problem',  required=True, type=str, help='Problem name', choices=problem_names)

    parser.add_argument('--config-file', help="Use this option to use a different configuration file and override the default options set in 'config.toml'")
    parser.add_argument('--only-print-reports', action='store_true', help='Use this option to only load the models and print their reports.')
    parser.add_argument('--seed', type=int, help='Use this option to provide a new seed to Pytorch/Numpy/Random')

    parser.add_argument('--validation-step', type=int, help="Change validation step. Set 0 (zero) to disable validation during training.")
    parser.add_argument('--save-snapshot', type=parse_boolean, nargs='?', const=True, help="Save snapshots at each validation step")
    #Oracle
    parser.add_argument('--oracle', type=str, default='Oracle.pth', help="Filename to save the Oracle Model")
    parser.add_argument('--oracle-resume', type=str, help="Filename to resume the Oracle Model")
    parser.add_argument('--oracle-arch', type=str, default='vgg16', help="Oracle architecture (default: vgg16)")
    parser.add_argument('--dont-train-oracle', action='store_true', help="You can use this option to: Resume the Oracle's Model, or test the problem on an Oracle's Model with random weights")
    parser.add_argument('--oracle-max-epochs', type=int, help="Change maximum epochs to train Oracle's Model")
    parser.add_argument('--oracle-batch-size', type=int, help="Batch size to train Oracle's Model")
    parser.add_argument('--oracle-lr', type=float, help="Learning rate to train Oracle's Model")
    parser.add_argument('--oracle-gamma', type=float, help="Gamma to train Oracle's Model. It is the value to decrease the learning rate (lr*gamma)")
    parser.add_argument('--oracle-dataset-root', type=str, default='', help="Root folder of dataset files (image list and images listes in it)")
    #Copycat:
    parser.add_argument('--copycat', type=str, default='Copycat.pth', help="Filename to save the Copycat Model")
    parser.add_argument('--copycat-resume', type=str, help="Filename to resume the Copycat Model")
    parser.add_argument('--copycat-arch', type=str, default='vgg16', help="Copycat (and Finetune) architecture (default: vgg16)")
    parser.add_argument('--dont-train-copycat', action='store_true', help="You can use this option to test a Copycat's Model with random weights")
    parser.add_argument('--copycat-max-epochs', type=int, help="Change maximum epochs to train Copycat's Model")
    parser.add_argument('--copycat-batch-size', type=int, help="Batch size to train Copycat's Model")
    parser.add_argument('--copycat-lr', type=float, help="Learning rate to train Copycat's Model")
    parser.add_argument('--copycat-gamma', type=float, help="Gamma to train Copycat's Model. It is the value to decrease the learning rate (lr*gamma)")
    parser.add_argument('--copycat-dataset-root', type=str, default='', help="Root folder of Copycat problem dataset")
    parser.add_argument('--copycat-balance-dataset', type=parse_boolean, help="Replicate or drop images to balance the number of images per class")
    parser.add_argument('--dont-label-copycat-dataset', action='store_true', help='Use this option to avoid labeling the Copycat training dataset (NPD)')
    #Finetune:
    parser.add_argument('--finetune', type=str, default='Copycat-Finetune.pth', help="Filename to save the Copycat Finetune Model")
    parser.add_argument('--finetune-resume', type=str, help="Filename to resume the Copycat Finetune Model")
    parser.add_argument('--dont-train-finetune', action='store_true', help="You can use this option to avoid finetuning Copycat model")
    parser.add_argument('--finetune-max-epochs', type=int, help="Change maximum epochs to finetune Copycat's Model")
    parser.add_argument('--finetune-batch-size', type=int, help="Batch size to finetune Copycat's Model")
    parser.add_argument('--finetune-lr', type=float, help="Learning rate to finetune Copycat's Model")
    parser.add_argument('--finetune-gamma', type=float, help="Gamma to finetune Copycat's Model. It is the value to decrease the learning rate (lr*gamma)")
    parser.add_argument('--finetune-dataset-root', type=str, default='', help="Root folder of Copycat problem dataset")
    parser.add_argument('--finetune-balance-dataset', type=parse_boolean, help="Replicate or drop images to balance the number of images per class")
    parser.add_argument('--dont-label-finetune-dataset', action='store_true', help='Use this option to avoid labeling the Copycat finetune dataset (NPD)')
    
    args = parser.parse_args()

    return Options(problem_name=args.problem,
                   only_print_reports=args.only_print_reports,
                   config_file=args.config_file, validation_step=args.validation_step, save_snapshot=args.save_snapshot,
                   #oracle
                   oracle_filename=args.oracle,
                   oracle_resume_filename=args.oracle_resume,
                   oracle_arch=args.oracle_arch,
                   train_oracle=not args.dont_train_oracle,
                   oracle_max_epochs=args.oracle_max_epochs,
                   oracle_batch_size=args.oracle_batch_size,
                   oracle_lr=args.oracle_lr,
                   oracle_gamma=args.oracle_gamma,
                   oracle_dataset_root=args.oracle_dataset_root,
                   #copycat
                   copycat_filename=args.copycat,
                   copycat_resume_filename=args.copycat_resume,
                   copycat_arch=args.copycat_arch,
                   train_copycat=not args.dont_train_copycat,
                   copycat_max_epochs=args.copycat_max_epochs,
                   copycat_batch_size=args.copycat_batch_size,
                   copycat_lr=args.copycat_lr,
                   copycat_gamma=args.copycat_gamma,
                   copycat_dataset_root=args.copycat_dataset_root,
                   copycat_balance_dataset=args.copycat_balance_dataset,
                   label_copycat_dataset=not args.dont_label_copycat_dataset,
                   #finetune
                   finetune_filename=args.finetune,
                   finetune_resume_filename=args.finetune_resume,
                   train_finetune=not args.dont_train_finetune,
                   finetune_max_epochs=args.finetune_max_epochs,
                   finetune_batch_size=args.finetune_batch_size,
                   finetune_lr=args.finetune_lr,
                   finetune_gamma=args.finetune_gamma,
                   finetune_dataset_root=args.finetune_dataset_root,
                   finetune_balance_dataset=args.finetune_balance_dataset,
                   label_finetune_dataset=not args.dont_label_finetune_dataset,
                   #seed
                   seed=args.seed)

### Using Framework

In [None]:
oracle_epochs = 10
copycat_epochs = 5

In [24]:
oracle_epochs = 10
copycat_epochs = 5
seed = set_seeds(7)
if __name__ == "__main__":
    options_mnist = Options(problem_name='MNIST',
                   only_print_reports=None,
                   config_file=None, validation_step=2, save_snapshot=1,
                   #oracle
                   oracle_filename='mnist.pth',
                   oracle_resume_filename=None,
                   oracle_arch='resnet18',
                   train_oracle=True,
                   oracle_max_epochs=oracle_epochs,
                   oracle_batch_size=32,
                   oracle_lr=0.0001,
                   oracle_gamma=0.3,
                   oracle_dataset_root='',
                   #copycat
                   copycat_filename='copycat_mnist.pth',
                   copycat_resume_filename=None,
                   copycat_arch='resnet18',
                   train_copycat=True,
                   copycat_max_epochs=copycat_epochs,
                   copycat_batch_size=32,
                   copycat_lr=0.0001,
                   copycat_gamma=0.3,
                   copycat_dataset_root='',
                   copycat_balance_dataset=None,
                   label_copycat_dataset=True,
                   #finetune
                   finetune_filename='',
                   finetune_resume_filename=None,
                   train_finetune=False,
                   finetune_max_epochs=None,
                   finetune_batch_size=None,
                   finetune_lr=None,
                   finetune_gamma=None,
                   finetune_dataset_root='',
                   finetune_balance_dataset=None,
                   label_finetune_dataset=False,
                   #seed
                   seed=seed)
    print(options_mnist)
    #input('\nCheck the parameters and press ENTER to continue...\n')
    problem = Problem(problem=options_mnist.problem_name,
                      #oracle
                      oracle_arch=options_mnist.oracle_arch,
                      oracle_filename=options_mnist.oracle_filename,
                      oracle_resume_filename=options_mnist.oracle_resume_filename,
                      oracle_dataset_root=options_mnist.oracle_dataset_root,
                      #copycat
                      copycat_arch=options_mnist.copycat_arch,
                      copycat_filename=options_mnist.copycat_filename,
                      copycat_resume_filename=options_mnist.copycat_resume_filename,
                      copycat_dataset_root=options_mnist.copycat_dataset_root,
                      #finetune
                      finetune_filename=options_mnist.finetune_filename,
                      finetune_resume_filename=options_mnist.finetune_resume_filename,
                      finetune_dataset_root=options_mnist.finetune_dataset_root,
                      #configuration file
                      config_fn=options_mnist.config_file,
                      #seed
                      seed=options_mnist.seed)

    problem.run(validation_step=options_mnist.validation_step,
                    save_snapshot=options_mnist.save_snapshot,
                    #Oracle options:
                    train_oracle=options_mnist.train_oracle,
                    oracle_max_epochs=options_mnist.oracle_max_epochs,
                    oracle_batch_size=options_mnist.oracle_batch_size,
                    oracle_lr=options_mnist.oracle_lr,
                    oracle_gamma=options_mnist.oracle_gamma,
                    #Copycat options:
                    train_copycat=options_mnist.train_copycat,
                    label_copycat_dataset=options_mnist.label_copycat_dataset,
                    copycat_max_epochs=options_mnist.copycat_max_epochs,
                    copycat_batch_size=options_mnist.copycat_batch_size,
                    copycat_lr=options_mnist.copycat_lr,
                    copycat_gamma=options_mnist.copycat_gamma,
                    copycat_balance_dataset=options_mnist.copycat_balance_dataset,
                    #Copycat Finetune options:
                    finetune_copycat=options_mnist.train_finetune,
                    label_finetune_dataset=options_mnist.label_finetune_dataset,
                    finetune_max_epochs=options_mnist.finetune_max_epochs,
                    finetune_batch_size=options_mnist.finetune_batch_size,
                    finetune_lr=options_mnist.finetune_lr,
                    finetune_gamma=options_mnist.finetune_gamma,
                    finetune_balance_dataset=options_mnist.finetune_balance_dataset
                 )

Options:
  Problem: MNIST
  Oracle:
     Model filename: 'mnist.pth'
     Model arch: resnet18
     Maximum training epochs: 10
     Batch size: 32
     Learning Rate: 0.0001
     Gamma: 0.3
     Dataset: od1 ('/home/jeiks/Christian/data/mnist_train.txt')
     Test dataset: test1 ('/home/jeiks/Christian/data/mnist_test.txt')
  Copycat:
     Model filename: 'copycat_mnist.pth'
     Model arch: resnet18
     Maximum training epochs: 5
     Batch size: 32
     Learning Rate: 0.0001
     Gamma: 0.3
     Dataset: npd_5k1 ('/home/jeiks/Christian/data/npd.txt')
     Test dataset: test1 ('/home/jeiks/Christian/data/mnist_test.txt')
     The training dataset will be balanced.
     The training dataset will be labeled by the Oracle Model.
  Copycat Finetuning:
     It will NOT be trained.

  Validation Steps: 2
  A snapshot of the model will be saved for each validation step.

Device to use: 'NVIDIA GeForce RTX 4070'

==> Training Oracle (Mon Feb 17 22:19:40 2025):
(Oracle) Starting a new model 

[ Epoch: 01/10 Loss: 0.076470 Accuracy: 93.72% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.10it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 217.09it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.985400 Macro Avg: 0.985289


[ Epoch: 02/10 Loss: 0.047293 Accuracy: 98.33% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.36it/s]
[ Epoch: 03/10 Loss: 0.040451 Accuracy: 98.75% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.93it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 228.43it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.991000 Macro Avg: 0.990940


[ Epoch: 04/10 Loss: 0.035500 Accuracy: 99.09% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.87it/s]
[ Epoch: 05/10 Loss: 0.025433 Accuracy: 99.18% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.66it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 226.77it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.992900 Macro Avg: 0.992849


[ Epoch: 06/10 Loss: 0.021101 Accuracy: 99.31% lr: 3.00e-05 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.76it/s]
[ Epoch: 07/10 Loss: 0.026347 Accuracy: 99.41% lr: 3.00e-05 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.74it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 224.68it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.993000 Macro Avg: 0.992952


[ Epoch: 08/10 Loss: 0.012179 Accuracy: 99.40% lr: 3.00e-05 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.52it/s]
[ Epoch: 09/10 Loss: 0.020928 Accuracy: 99.42% lr: 9.00e-06 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.56it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 225.46it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.993600 Macro Avg: 0.993558


[ Epoch: 10/10 Loss: 0.015272 Accuracy: 99.36% lr: 2.70e-06 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [00:22<00:00, 82.54it/s]


Saving model in: "mnist.pth"


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 225.03it/s]


Oracle reports:
Training - Dataset ImageList:
    Number of datapoints: 60000
     * Class 0:   5923 samples
     * Class 1:   6742 samples
     * Class 2:   5958 samples
     * Class 3:   6131 samples
     * Class 4:   5842 samples
     * Class 5:   5421 samples
     * Class 6:   5918 samples
     * Class 7:   6265 samples
     * Class 8:   5851 samples
     * Class 9:   5949 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                             Normalize(mean=(0.1307, 0.1307, 0.1307), std=(0.2818, 0.2818, 0.2818))
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 10000
     * Class 0:    980 samples
     * Class 1:   1135 samples
     * Class 2:   1032 samples
     * Class 3:   1010 samples
     * Class 4:    982 samples
     * Class 5:    892 samples
     * Class 6:    



(Copycat) Starting a new model with random parameters...


[ Labeling dataset "npd_5k1" ]: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [17:30<00:00, 38.11it/s]


Scheduler Milestones: [2, 3]


[ Epoch: 01/5 Loss: 0.741111 Accuracy: 62.90% lr: 1.00e-04 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:09<00:00, 81.85it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 211.45it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.625100 Macro Avg: 0.573053


[ Epoch: 02/5 Loss: 0.510248 Accuracy: 77.64% lr: 1.00e-04 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:09<00:00, 81.75it/s]
[ Epoch: 03/5 Loss: 0.509123 Accuracy: 82.66% lr: 3.00e-05 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:11<00:00, 81.42it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 210.52it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.772900 Macro Avg: 0.761489


[ Epoch: 04/5 Loss: 0.426576 Accuracy: 84.06% lr: 9.00e-06 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:10<00:00, 81.61it/s]
[ Epoch: 05/5 Loss: 0.441778 Accuracy: 84.48% lr: 9.00e-06 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:11<00:00, 81.53it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 210.64it/s]


~~ F1 Score on test1 dataset:
~~ Micro Avg: 0.801600 Macro Avg: 0.790947
Saving Copycat model in: "copycat_mnist.pth"


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 209.71it/s]


Copycat reports:
Training - Dataset ImageList (Balanced):
    Number of datapoints: 1281160
     * Samples per class: 128116
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 10000
     * Class 0:    980 samples
     * Class 1:   1135 samples
     * Class 2:   1032 samples
     * Class 3:   1010 samples
     * Class 4:    982 samples
     * Class 5:    892 samples
     * Class 6:    958 samples
     * Class 7:   1028 samples
     * Class 8:    974 samples
     * Class 9:   1009 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None

Metrics:
Co



(Finetune) Loading model from "copycat_mnist.pth"...


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 203.04it/s]

Accuracy (Macro F1-Score):
  Oracle...: 0.993553
  Copycat..: 0.790947
  Finetune.: 0.790947
Attack Performance
  Copycat on Oracle...........: 79.61%
  Finetuned Copycat on Oracle.: 79.61%
Finished (Mon Feb 17 23:22:22 2025)






In [26]:
problem.print_reports()

Oracle:


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 202.25it/s]


Oracle reports:
Training - Dataset ImageList:
    Number of datapoints: 60000
     * Class 0:   5923 samples
     * Class 1:   6742 samples
     * Class 2:   5958 samples
     * Class 3:   6131 samples
     * Class 4:   5842 samples
     * Class 5:   5421 samples
     * Class 6:   5918 samples
     * Class 7:   6265 samples
     * Class 8:   5851 samples
     * Class 9:   5949 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                             Normalize(mean=(0.1307, 0.1307, 0.1307), std=(0.2818, 0.2818, 0.2818))
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 10000
     * Class 0:    980 samples
     * Class 1:   1135 samples
     * Class 2:   1032 samples
     * Class 3:   1010 samples
     * Class 4:    982 samples
     * Class 5:    892 samples
     * Class 6:    

[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 209.90it/s]


Copycat reports:
Training - Dataset ImageList (Balanced):
    Number of datapoints: 1281160
     * Samples per class: 128116
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 10000
     * Class 0:    980 samples
     * Class 1:   1135 samples
     * Class 2:   1032 samples
     * Class 3:   1010 samples
     * Class 4:    982 samples
     * Class 5:    892 samples
     * Class 6:    958 samples
     * Class 7:   1028 samples
     * Class 8:    974 samples
     * Class 9:   1009 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None

Metrics:
Co

[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 210.46it/s]

Copycat reports:
Training - Dataset ImageList:
    Number of datapoints: 10000
     * Class 0:    980 samples
     * Class 1:   1135 samples
     * Class 2:   1032 samples
     * Class 3:   1010 samples
     * Class 4:    982 samples
     * Class 5:    892 samples
     * Class 6:    958 samples
     * Class 7:   1028 samples
     * Class 8:    974 samples
     * Class 9:   1009 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 10000
     * Class 0:    980 samples
     * Class 1:   1135 samples
     * Class 2:   1032 samples
     * Class 3:   1010 samples
     * Class 4:    982 samples
     * Class 5:    892 samples
     * Class 6:    958 samples
     * Class 7:   1028 samples
     * Class 8:    974 samples
     * Class 9:   1009 sa




In [28]:
options_mnist_svhn = Options(problem_name='SVHN',
                   only_print_reports=None,
                   config_file=None, validation_step=2, save_snapshot=1,
                   #oracle
                   oracle_filename='svhn.pth',
                   oracle_resume_filename=None,
                   oracle_arch='resnet18',
                   train_oracle=True,
                   oracle_max_epochs=oracle_epochs,
                   oracle_batch_size=32,
                   oracle_lr=0.0001,
                   oracle_gamma=0.3,
                   oracle_dataset_root='',
                   #copycat
                   copycat_filename='copycat_mnist_svhn.pth',
                   copycat_resume_filename='weights_primeiro/copycat_mnist.pth',
                   copycat_arch='resnet18',
                   train_copycat=True,
                   copycat_max_epochs=copycat_epochs,
                   copycat_batch_size=32,
                   copycat_lr=0.0001,
                   copycat_gamma=0.3,
                   copycat_dataset_root='',
                   copycat_balance_dataset=None,
                   label_copycat_dataset=True,
                   #finetune
                   finetune_filename='',
                   finetune_resume_filename=None,
                   train_finetune=False,
                   finetune_max_epochs=None,
                   finetune_batch_size=None,
                   finetune_lr=None,
                   finetune_gamma=None,
                   finetune_dataset_root='',
                   finetune_balance_dataset=None,
                   label_finetune_dataset=False,
                   #seed
                   seed=seed)
print(options_mnist_svhn)
problem = Problem(problem=options_mnist_svhn.problem_name,
                  #oracle
                  oracle_arch=options_mnist_svhn.oracle_arch,
                  oracle_filename=options_mnist_svhn.oracle_filename,
                  oracle_resume_filename=options_mnist_svhn.oracle_resume_filename,
                  oracle_dataset_root=options_mnist_svhn.oracle_dataset_root,
                  #copycat
                  copycat_arch=options_mnist_svhn.copycat_arch,
                  copycat_filename=options_mnist_svhn.copycat_filename,
                  copycat_resume_filename=options_mnist_svhn.copycat_resume_filename,
                  copycat_dataset_root=options_mnist_svhn.copycat_dataset_root,
                  #finetune
                  finetune_filename=options_mnist_svhn.finetune_filename,
                  finetune_resume_filename=options_mnist_svhn.finetune_resume_filename,
                  finetune_dataset_root=options_mnist_svhn.finetune_dataset_root,
                  #configuration file
                  config_fn=options_mnist_svhn.config_file,
                  #seed
                  seed=options_mnist_svhn.seed)
problem.run(validation_step=options_mnist.validation_step,
                save_snapshot=options_mnist.save_snapshot,
                #Oracle options:
                train_oracle=options_mnist.train_oracle,
                oracle_max_epochs=options_mnist.oracle_max_epochs,
                oracle_batch_size=options_mnist.oracle_batch_size,
                oracle_lr=options_mnist.oracle_lr,
                oracle_gamma=options_mnist.oracle_gamma,
                #Copycat options:
                train_copycat=options_mnist.train_copycat,
                label_copycat_dataset=options_mnist.label_copycat_dataset,
                copycat_max_epochs=options_mnist.copycat_max_epochs,
                copycat_batch_size=options_mnist.copycat_batch_size,
                copycat_lr=options_mnist.copycat_lr,
                copycat_gamma=options_mnist.copycat_gamma,
                copycat_balance_dataset=options_mnist.copycat_balance_dataset,
                #Copycat Finetune options:
                finetune_copycat=options_mnist.train_finetune,
                label_finetune_dataset=options_mnist.label_finetune_dataset,
                finetune_max_epochs=options_mnist.finetune_max_epochs,
                finetune_batch_size=options_mnist.finetune_batch_size,
                finetune_lr=options_mnist.finetune_lr,
                finetune_gamma=options_mnist.finetune_gamma,
                finetune_balance_dataset=options_mnist.finetune_balance_dataset
                 )

teste a


==> Training Oracle (Tue Feb 18 17:49:49 2025):




(Oracle) Starting a new model with random parameters...
Scheduler Milestones: [5, 8, 9]


[ Epoch: 01/10 Loss: 0.605778 Accuracy: 60.75% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.26it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 208.96it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.860057 Macro Avg: 0.847766


[ Epoch: 02/10 Loss: 0.415703 Accuracy: 86.85% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.66it/s]
[ Epoch: 03/10 Loss: 0.361758 Accuracy: 90.10% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:27<00:00, 81.82it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 212.71it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.924862 Macro Avg: 0.918063


[ Epoch: 04/10 Loss: 0.274034 Accuracy: 91.78% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:27<00:00, 81.82it/s]
[ Epoch: 05/10 Loss: 0.205755 Accuracy: 92.83% lr: 1.00e-04 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.68it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 215.26it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.937346 Macro Avg: 0.931924


[ Epoch: 06/10 Loss: 0.213594 Accuracy: 93.59% lr: 3.00e-05 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.59it/s]
[ Epoch: 07/10 Loss: 0.213063 Accuracy: 93.84% lr: 3.00e-05 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.71it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 211.37it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.940919 Macro Avg: 0.935359


[ Epoch: 08/10 Loss: 0.238366 Accuracy: 93.95% lr: 3.00e-05 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.71it/s]
[ Epoch: 09/10 Loss: 0.164375 Accuracy: 94.15% lr: 9.00e-06 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.57it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 208.43it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.941341 Macro Avg: 0.935805


[ Epoch: 10/10 Loss: 0.167478 Accuracy: 94.28% lr: 2.70e-06 ]: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2290/2290 [00:28<00:00, 81.48it/s]


Saving model in: "svhn.pth"


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 204.96it/s]


Oracle reports:
Training - Dataset ImageList:
    Number of datapoints: 73257
     * Class 0:   4948 samples
     * Class 1:  13861 samples
     * Class 2:  10585 samples
     * Class 3:   8497 samples
     * Class 4:   7458 samples
     * Class 5:   6882 samples
     * Class 6:   5727 samples
     * Class 7:   5595 samples
     * Class 8:   5045 samples
     * Class 9:   4659 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                             Normalize(mean=(0.4381, 0.4442, 0.4732), std=(0.117, 0.12, 0.1025))
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 26032
     * Class 0:   1744 samples
     * Class 1:   5099 samples
     * Class 2:   4149 samples
     * Class 3:   2882 samples
     * Class 4:   2523 samples
     * Class 5:   2384 samples
     * Class 6:   1977



(Copycat) Loading model from "weights_primeiro/copycat_mnist.pth"...


[ Labeling dataset "npd_5k" ]: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [16:41<00:00, 39.97it/s]


Scheduler Milestones: [2, 3]


[ Epoch: 01/5 Loss: 0.924825 Accuracy: 57.84% lr: 1.00e-04 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:09<00:00, 81.76it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 219.67it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.894246 Macro Avg: 0.890336


[ Epoch: 02/5 Loss: 0.694760 Accuracy: 71.68% lr: 1.00e-04 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:09<00:00, 81.77it/s]
[ Epoch: 03/5 Loss: 0.564716 Accuracy: 78.55% lr: 3.00e-05 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:12<00:00, 81.37it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 219.57it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.924324 Macro Avg: 0.918190


[ Epoch: 04/5 Loss: 0.538885 Accuracy: 80.80% lr: 9.00e-06 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:11<00:00, 81.48it/s]
[ Epoch: 05/5 Loss: 0.501517 Accuracy: 81.49% lr: 9.00e-06 ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 40037/40037 [08:10<00:00, 81.57it/s]
[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 219.05it/s]


~~ F1 Score on test dataset:
~~ Micro Avg: 0.924093 Macro Avg: 0.917961
Saving Copycat model in: "copycat_mnist_svhn.pth"


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 220.28it/s]


Copycat reports:
Training - Dataset ImageList (Balanced):
    Number of datapoints: 1281160
     * Samples per class: 128116
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None
Testing - Dataset ImageList:
    Number of datapoints: 26032
     * Class 0:   1744 samples
     * Class 1:   5099 samples
     * Class 2:   4149 samples
     * Class 3:   2882 samples
     * Class 4:   2523 samples
     * Class 5:   2384 samples
     * Class 6:   1977 samples
     * Class 7:   2019 samples
     * Class 8:   1660 samples
     * Class 9:   1595 samples
    Transforms (if any): Compose(
                             Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
                             ToTensor()
                         )
    Target Transforms (if any): None

Metrics:
Co



(Finetune) Loading model from "weights_primeiro/copycat_mnist.pth"...


[ Testing ]: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:03<00:00, 221.06it/s]

Accuracy (Macro F1-Score):
  Oracle...: 0.936004
  Copycat..: 0.917961
  Finetune.: 0.090484
Attack Performance
  Copycat on Oracle...........: 98.07%
  Finetuned Copycat on Oracle.: 9.67%
Finished (Tue Feb 18 18:53:02 2025)




