# data

> Fill in a module description here


In [None]:
#| default_exp vision.data

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import numpy as np
import os
import sys
import random
import torch
import torchvision
import torchvision.transforms as transforms
from utils.dataset_utils import check, separate_data, split_data, save_file
from fedai.data import *
from fedai.utils import *
from fastcore.utils import *

In [None]:
#| export
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

In [None]:
class FvisionBlock(FDblock):  # type: ignore # noqa: F405
    def __init__(self, cfg, partitioner: str):
        super().__init__(cfg, partitioner)

        self.transform_norm_mapping = {
            "CIFAR10": ((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            "CIFAR100": ((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            "MNIST": ((0.5,), (0.5,)),
            "FashionMNIST": ((0.5,), (0.5,)),
            "EMNIST": ((0.5,), (0.5,)),
        }

    

In [None]:
#| export
@patch
def load_data(self: FvisionBlock):
    if not os.path.exists(self.cfg.data.dir_path):
        os.makedirs(self.cfg.data.dir_path)
    
    ds_class = get_class(torchvision.datasets, self.cfg.data.name)  # noqa: F405
    # Setup directory for train/test data

    if check(self.config_path, self.train_path, self.test_path, self.cfg.num_clients, self.cfg.data.niid, self.cfg.data.balance, self.cfg.data.partition):
        return
    
    transform = transforms.Compose(
        [transforms.ToTensor(), 
        transforms.Normalize(self.transform_norm_mapping[self.cfg.data.name][0],
                             self.transform_norm_mapping[self.cfg.data.name][1])])

    trainset = ds_class(
        root=self.cfg.data.dir_path+"rawdata", train=True, download=True, transform=transform)
    testset = ds_class(
        root=self.cfg.data.dir_path+"rawdata", train=False, download=True, transform=transform)
    
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=len(trainset.data), shuffle=False)
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=len(testset.data), shuffle=False)

    for _, train_data in enumerate(trainloader, 0):
        trainset.data, trainset.targets = train_data
    for _, test_data in enumerate(testloader, 0):
        testset.data, testset.targets = test_data

    dataset_image = []
    dataset_label = []

    dataset_image.extend(trainset.data.cpu().detach().numpy())
    dataset_image.extend(testset.data.cpu().detach().numpy())
    dataset_label.extend(trainset.targets.cpu().detach().numpy())
    dataset_label.extend(testset.targets.cpu().detach().numpy())
    dataset_image = np.array(dataset_image)
    dataset_label = np.array(dataset_label)

    num_classes = len(set(dataset_label))
    print(f'Number of classes: {num_classes}')

    return dataset_image, dataset_label

In [None]:
path = '/home/ahmed/Ahmed-home/1- Projects/Research/publications/2024/letter 1/code/PFLlib/dataset/Cifar10/'

In [None]:
def read_data(path, idx, is_train=True):
    if is_train:
        train_data_dir = os.path.join(path, 'train')

        train_file = os.path.join(train_data_dir, str(idx) + '.npz')
        with open(train_file, 'rb') as f:
            train_data = np.load(f, allow_pickle=True)['data'].tolist()

        return train_data

    else:
        test_data_dir = os.path.join(path, 'test/')

        test_file = test_data_dir + str(idx) + '.npz'
        with open(test_file, 'rb') as f:
            test_data = np.load(f, allow_pickle=True)['data'].tolist()
    
        return test_data

In [None]:
DATA_DIR = '/home/ahmed/Ahmed-home/1- Projects/Research/publications/2024/letter 1/code/mira/data/'

In [None]:
#| hide
import nbdev
nbdev.nbdev_export()