In [None]:


# !pip install --upgrade ipykernel

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)



path_to_data = r"car_price_dataset.csv"

data = pd.read_csv(path_to_data)



In [None]:
from sklearn.preprocessing import StandardScaler
print(data.dtypes)

x =  np.where(data.dtypes.to_numpy() == np.dtype('int64'))
y = np.where(data.dtypes.to_numpy() == np.dtype('float64'))

numeric_cols_idx = np.concatenate((x[0],y[0]))
print(numeric_cols_idx)
numeric_cols = data.columns[numeric_cols_idx]

print(numeric_cols)
scaler = StandardScaler()
scaler.fit(data[numeric_cols])

# print(data.columns)


In [None]:
data[numeric_cols] = scaler.transform(data[numeric_cols])
print(data)
np_data = data.to_numpy()

print(np_data)

In [None]:
from sklearn.preprocessing import LabelEncoder

categoric_cols_idx = np.where(data.dtypes.to_numpy() == np.dtype('object'))[0]
categoric_cols = data.columns[categoric_cols_idx]
print(categoric_cols, categoric_cols_idx)
encoders = [LabelEncoder() for _ in range(len(categoric_cols))]

In [None]:
for i in range(len(categoric_cols)):
    encoders[i].fit(data[categoric_cols[i]])
    data[categoric_cols[i]] = encoders[i].transform(data[categoric_cols[i]])

# data[categoric_cols] = data[categoric_cols].apply(lambda x: data[x.name].fit_transform(x))
print(data[categoric_cols])

BRAND_EMBED, MODEL_EMBED, FUEL_EMBED, TRANSMISSION_EMBED = (data[categoric_cols].apply(np.max)+1).to_numpy()
print(BRAND_EMBED, MODEL_EMBED, FUEL_EMBED, TRANSMISSION_EMBED)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

INPUT_SIZE = 256
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CAT_COLS_IDX = categoric_cols_idx
EMBEDDING_SIZE = 4

EPOCHS = 100

class dataset(Dataset):
    def __init__(self, data, numeric_cols, categoric_cols):
        self.numeric_data = torch.tensor(data[numeric_cols[:-1]].to_numpy(), dtype = torch.float32)
        self.categoric_data = torch.tensor(data[categoric_cols].to_numpy())
        self.price = torch.tensor(data['Price'].to_numpy(), dtype = torch.float32)

    def __len__(self):
        return len(self.price)

    def __getitem__(self, idx):
        num_features = self.numeric_data[idx]
        cat_features = self.categoric_data[idx]
        price = self.price[idx]
        return num_features, cat_features, price

def collate_fn(batch):
    num_batch = torch.stack([item[0] for item in batch])
    cat_batch = torch.stack([item[1] for item in batch])
    prices = torch.stack([item[2] for item in batch])
    return num_batch, cat_batch, prices


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.ln1 = nn.Linear(4*4 + 5, INPUT_SIZE//2)
        self.ln2 = nn.Linear(INPUT_SIZE//2, INPUT_SIZE//4)
        self.out = nn.Linear(INPUT_SIZE//4, 1)
        self.relu = nn.ReLU()

        embed_list = [BRAND_EMBED, MODEL_EMBED, FUEL_EMBED, TRANSMISSION_EMBED]

        self.embeddings = nn.ModuleList([nn.Embedding(i, EMBEDDING_SIZE) for i in embed_list])

    def forward(self, num, cat) -> torch.Tensor:
        num = num.to(DEVICE)
        cat = cat.to(DEVICE)

        embeds = [self.embeddings[i](cat[:, i]) for i in range(len(CAT_COLS_IDX))]
        embed = torch.cat(embeds, dim=1)

        x = torch.cat((embed, num), dim=1)
        return self.out(self.relu(self.ln2(self.relu(self.ln1(x)))))

import matplotlib.pyplot as plt
def train(net: Net, trainloader, epochs = EPOCHS):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr = 0.01)
    losses = []
    for _ in range(epochs):
        epoch_loss = 0
        for num_batch, cat_batch, price_batch in trainloader:
            num_batch = num_batch.to(DEVICE)
            cat_batch = cat_batch.to(DEVICE)
            price_batch = price_batch.to(DEVICE)
            # print(cat_batch, cat_batch.shape)
            optimizer.zero_grad()
            y_out = net(num_batch, cat_batch).squeeze(1)
            # print(y_out.shape, price_batch.shape)
            loss = criterion(y_out, price_batch)
            loss.backward()
            optimizer.step()
            epoch_loss+= loss.item()
        epoch_loss/= len(trainloader.dataset)
        losses.append(epoch_loss)
        if(_%10==0):
            print(f"EPOCH: {_}, LOSS: {epoch_loss}")
    plt.plot(losses)
    plt.show()

def flwr_train(net: Net, trainloader, epochs = EPOCHS):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr = 0.01)
    losses = []
    for _ in range(epochs):
        epoch_loss = 0
        for num_batch, cat_batch, price_batch in trainloader:
            num_batch = num_batch.to(DEVICE)
            cat_batch = cat_batch.to(DEVICE)
            price_batch = price_batch.to(DEVICE)
            # print(cat_batch, cat_batch.shape)
            optimizer.zero_grad()
            y_out = net(num_batch, cat_batch).squeeze(1)
            # print(y_out.shape, price_batch.shape)
            loss = criterion(y_out, price_batch)
            loss.backward()
            optimizer.step()
            epoch_loss+= loss.item()
        epoch_loss/= len(trainloader.dataset)
        losses.append(epoch_loss)
        if(_%10==0):
            print(f"EPOCH: {_}, LOSS: {epoch_loss}")

def test(net: Net, testloader):
    criterion = nn.MSELoss()
    net.train()
    tot_loss = 0
    with torch.no_grad():
        for num_batch, cat_batch, price_batch in testloader:
            num_batch = num_batch.to(DEVICE)
            cat_batch = cat_batch.to(DEVICE)
            price_batch = price_batch.to(DEVICE)
            y_out = net(num_batch, cat_batch).squeeze(1)
            loss = criterion(y_out, price_batch)
            tot_loss += loss.item()
    tot_loss/=len(testloader.dataset)
    return tot_loss

In [None]:
from sklearn.model_selection import train_test_split
BATCH_SIZE = 32

train_data, test_data = train_test_split(data, test_size = 0.1 )


# print(test_data, train_data)
# print(len(test_data), len(train_data))
full_dataset = dataset(data, numeric_cols, categoric_cols)
train_dataset = dataset(train_data, numeric_cols, categoric_cols)
test_dataset = dataset(test_data, numeric_cols, categoric_cols)

trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
testloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)



In [None]:
# x = next(iter(trainloader))

# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(net.parameters())
# for i in range(100):
#     num, cat, price = x[0][0],x[1][0], x[2][0]
#     out = net(num, cat).squeeze(0)
#     optimizer.zero_grad()
#     loss = criterion(out, price)
#     if(i%10==0):
#         print("out: ", out, "price: ", price, "loss: ",loss)
#     loss.backward()
#     optimizer.step()
net = Net()
net = net.to(DEVICE)
print(net)

In [None]:

num_batch, cat_batch, price_batch = next(iter(trainloader))

# print(num_batch, num_batch.shape )
# print(cat_batch, cat_batch.shape )
# print(price_batch, price_batch.shape )

# brand_embed = nn.Embedding(BRAND_EMBED, 10)
# model_embed = nn.Embedding(MODEL_EMBED, 10)
# fuel_embed = nn.Embedding(FUEL_EMBED, 10)
# transmission_embed = nn.Embedding(TRANSMISSION_EMBED, 10)

# embeddings = nn.ModuleList( [brand_embed, model_embed, fuel_embed, transmission_embed] )

# embed = torch.tensor([])

# for i in range(len(CAT_COLS_IDX)):
#     embed = torch.concat((embed, embeddings[i](cat_batch[:, i]),))
# print(embed, embed.shape)


In [None]:
train(net, trainloader, epochs = 30)


In [None]:
print("TEST LOSS: ", test(net, testloader))

In [None]:
# %%capture
# !pip install flwr[simulation]

In [None]:
# %%capture
# pip install flwr-datasets

In [None]:
from datasets import load_dataset
from flwr_datasets.partitioner import IidPartitioner
from datasets import Dataset as HFdataset
from torch.utils.data import Subset
import sklearn
NUM_CLIENTS = 5

class Federated_Dataset():
    def __init__(self, dataset, num_client = NUM_CLIENTS):
        self.data_per_client = len(dataset) // num_client
        self.indices = np.random.permutation(len(dataset))
        self.dataset = dataset
        self.partitioned_dataset = []
        for i in range(num_client):
            self.partitioned_dataset.append(Subset(dataset, self.indices[i*self.data_per_client: (i+1)*self.data_per_client]))
    def load_partition(self, partition_id: int):
        return self.partitioned_dataset[partition_id]

def partition_dataset(dataset, num_client = NUM_CLIENTS):
    data_per_client = len(dataset) // num_client
    indices = np.random.permutation(len(dataset))
    partitioned_dataset = []
    for i in range(num_client):
        partitioned_dataset.append(Subset(dataset, indices[i*data_per_client: (i+1)*data_per_client]))
        print(partitioned_dataset[i].__len__())
    print(partitioned_dataset)
    return partitioned_dataset

def load_datasets(fds: Federated_Dataset, partition_id: int):
    partition = fds.load_partition(partition_id)
    train_partition, test_partition = sklearn.model_selection.train_test_split(partition, test_size = 0.1)
    fds_trainloader = DataLoader(train_partition, batch_size = BATCH_SIZE, shuffle = True)
    fds_testloader = DataLoader(test_partition, batch_size = BATCH_SIZE, shuffle = False)
    return fds_trainloader, fds_testloader








In [None]:
fds = Federated_Dataset(full_dataset, num_client = NUM_CLIENTS)
part_0= fds.load_partition(partition_id = 0)

part0_trainloader, part0_testloader =  load_datasets(fds, 0)
# print(next(iter(part0_trainloader)))

In [None]:
from collections import OrderedDict
def set_parameters(net, parameters):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)


def get_parameters(net):
    return [val.cpu().numpy() for _, val in net.state_dict().items()]

In [None]:
from flwr.client import NumPyClient, Client, ClientApp
from flwr.common import Context
from flwr.server import ServerApp, ServerConfig, ServerAppComponents
from flwr.server.strategy import FedAvg
from flwr.client import NumPyClient, Client, ClientApp
from flwr.common import Context
from flwr.server import ServerApp, ServerConfig, ServerAppComponents
from flwr.server.strategy import FedAvg
from collections import OrderedDict
import torch

class FlowerClient(NumPyClient):
    def __init__(self, partition_id, net,  trainloader, testloader):
        self.net = net
        self.trainloader = trainloader
        self.testloader = testloader
        self.partition_id = partition_id

    def set_params(self, parameters):
        params_dict = zip(self.net.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        self.net.load_state_dict(state_dict, strict=False)

    def get_params(self, config):
        print(f"[Client {self.partition_id}] get_parameters")
        return [val.cpu().numpy() for _, val in self.net.state_dict().items()]

    def fit(self, params, config):
        print(f"[Client {self.partition_id}] ----Training----")
        self.set_params(params)
        flwr_train(self.net, self.trainloader, epochs=30)
        return self.get_params(self.net), len(self.trainloader.dataset), {}

    def evaluate(self, params, config):
        self.set_params(params)
        loss = test(self.net, self.testloader)
        print(f"[Client {self.partition_id}] evaluate, Test_loss:{loss}")
        return loss, len(self.testloader.dataset), {'loss': loss}

def client_fn(context: Context):
    net = Net().to(DEVICE)
    partition_id = context.node_config['partition-id']
    print("NODE CONFIG: ", partition_id)
    trainloader, testloader = load_datasets(fds, partition_id)
    return FlowerClient(partition_id, net, trainloader, testloader).to_client()

client = ClientApp(client_fn=client_fn)


In [None]:
strategy = FedAvg(
    fraction_fit=1.0,  # Sample 100% of available clients for training
    fraction_evaluate=1,  # Sample 50% of available clients for evaluation
    min_fit_clients=NUM_CLIENTS,  # Never sample less than 10 clients for training
    min_evaluate_clients=NUM_CLIENTS,  # Never sample less than 5 clients for evaluation
    min_available_clients=NUM_CLIENTS,  # Wait until all 10 clients are available
)

In [None]:
def server_fn(context:Context):
    config = ServerConfig(num_rounds = 3)
    return ServerAppComponents(strategy = strategy, config = config)

server = ServerApp(server_fn = server_fn)

In [None]:
# Specify the resources each of your clients need
# By default, each client will be allocated 1x CPU and 0x GPUs
backend_config = {"client_resources": {"num_cpus": 1, "num_gpus": 0.0}}

# When running on GPU, assign an entire GPU for each client
if DEVICE == "cuda":
    backend_config = {"client_resources": {"num_cpus": 1, "num_gpus": 1.0}}
    # Refer to our Flower framework documentation for more details about Flower simulations
    # and how to set up the `backend_config`

In [None]:
from flwr.simulation import run_simulation

run_simulation(
    server_app=server,
    client_app=client,
    num_supernodes=NUM_CLIENTS,
    backend_config=backend_config,
)