In [1]:
import torch
import torch.nn as nn
import torch.distributed as dist 
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F
import numpy as np 
import os 
import random 
import time 
import logging 
from sklearn.metrics import precision_score, recall_score, f1_score,accuracy_score
from datetime import timedelta
import pandas as pd

In [2]:
random.seed(43)
np.random.seed(43)
torch.manual_seed(43)
file_name = "/SFVL/motor_trial1.log"
logging.basicConfig(filename=file_name,
                    level=logging.INFO,
                    format='%(message)s')

In [3]:
# ------------- Init / Communication Utilities -------------

def init(rank, world_size, backend='gloo'):
    os.environ['GLOO_SOCKET_IFNAME'] = 'eth0'
    os.environ['MASTER_ADDR'] = 'client1'
    os.environ['MASTER_PORT'] = '29500'
    
    dist.init_process_group(backend=backend,
                            rank=rank,
                            world_size=world_size,
                            timeout=timedelta(seconds=60))
    
    print(f'Rank {rank} initialized and ready.')
    return dist.is_initialized()

def recv(arr, src):
    dist.recv(tensor=arr, src=src)

def snd(arr, dst):
    dist.send(tensor=arr.contiguous(), dst=dst)

def terminate(rank):
    dist.destroy_process_group()
    print(f'Rank {rank} successfully terminated.')

def send_model(model, dst):
    for key, param in model.state_dict().items():
        dist.send(param.data, dst=dst)
        print(f"Sent {key}")

def recv_model(model, src):
    for key, param in model.state_dict().items():
        dist.recv(param.data, src=src)
        print(f"Received {key}")

In [4]:
# ------------------ Dataset ------------------

class DatasetServer(Dataset):
    def __init__(self, transform=None, path='/SFVL/motor_target_train.csv'):
        super().__init__()
        self.data = pd.read_csv(path)
        self.transform = transform
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        y=self.data.iloc[index]
        y=y.drop(['Unnamed: 0'])
        if self.transform:
            y = self.transform(y) 
        return  y

In [5]:
class ToTensor:
    def __call__(self, input):
        return torch.tensor(input.values, dtype=torch.float32)


In [6]:
# -----------------------
class MotorNN(nn.Module):
    def __init__(self):
        super(MotorNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(16, 22),
            nn.ReLU(),
            nn.Linear(22,11),
            nn.ReLU(),
            nn.Linear(11,8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        return self.net(x)

In [7]:
# ------------------ Training Loop ------------------

def run(num_epoch,model):
    batch_size = 5
    transform = ToTensor()
    dataset = DatasetServer(transform=transform, path='/SFVL/motor_target_train.csv')
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False)
    
    model.train()
    criterion = nn.L1Loss()
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(num_epoch):
        for idx, (target_batch) in enumerate(dataloader):
            optim.zero_grad()
            smashed_data_c1 = torch.zeros((batch_size,8),dtype=torch.float32)
            smashed_data_c2 = torch.zeros_like(smashed_data_c1,dtype=torch.float32)
            recv(smashed_data_c1,src=0)
            recv(smashed_data_c2,src=1)
            
            input = torch.cat((smashed_data_c1,smashed_data_c2),dim=1)
            input.requires_grad_()
            logits = model(input)
            print(f'logits shape : {logits.size()}')
            print(f'target shape : {target_batch.size()} {target_batch[0]}')
            loss = criterion(logits, target_batch)
            
            loss.backward()
            optim.step()
            gradient = input.grad
            grad_c1 = gradient[:,:8]
            grad_c2 = gradient[:,8:]
            snd(grad_c1,dst=0)
            snd(grad_c2,dst=1)
            print(f"Epoch {epoch+1}, Batch {idx+1} processed loss : {loss.view(-1)}")
        if epoch == num_epoch-1:
            logging.info(f'training loss : {loss}')


In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import torch
import pandas as pd
import logging

def evaluation(model):
    model.eval()

    # Load test labels (drop 'Unnamed: 0' if present)
    df_target = pd.read_csv('/SFVL/motor_target_test.csv')
    if 'Unnamed: 0' in df_target.columns:
        df_target = df_target.drop(columns=['Unnamed: 0'])

    target = torch.tensor(df_target.values, dtype=torch.float32)

    len_dataset = target.size(0)
    print("True targets sample:\n", target[:5])

    # Receive smashed data from clients
    smashed_data_c1 = torch.zeros((len_dataset, 8), dtype=torch.float32)
    smashed_data_c2 = torch.zeros_like(smashed_data_c1)
    recv(smashed_data_c1, src=0)
    recv(smashed_data_c2, src=1)

    # Forward pass
    combined_input = torch.cat((smashed_data_c1, smashed_data_c2), dim=1)
    preds = model(combined_input)  

    print("Predictions sample:\n", preds[:5])

    # Convert to NumPy
    preds_np = preds.detach().cpu().numpy()
    true_np = target.cpu().numpy()

    # Compute regression metrics
    mse = mean_squared_error(true_np, preds_np)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(true_np, preds_np)
    r2 = r2_score(true_np, preds_np)

    # Print results
    print("\n--- Regression Evaluation Metrics ---")
    print(f"MSE  : {mse:.4f}")
    print(f"RMSE : {rmse:.4f}")
    print(f"MAE  : {mae:.4f}")
    print(f"R²   : {r2:.4f}")
    print("-------------------------------------\n")

    # Log results
    logging.info(f"MSE  : {mse:.4f}")
    logging.info(f"RMSE : {rmse:.4f}")
    logging.info(f"MAE  : {mae:.4f}")
    logging.info(f"R²   : {r2:.4f}")


In [9]:
rank = 2 
world_size = 3

init(rank=rank,world_size=world_size)
model = MotorNN()

Rank 2 initialized and ready.


In [10]:
num_epoch = 10
run(num_epoch=num_epoch,model=model)
evaluation(model=model)

logits shape : torch.Size([5, 1])
target shape : torch.Size([5, 1]) tensor([-0.2556])
Epoch 1, Batch 1 processed loss : tensor([0.4308], grad_fn=<ViewBackward0>)
logits shape : torch.Size([5, 1])
target shape : torch.Size([5, 1]) tensor([-0.2499])
Epoch 1, Batch 2 processed loss : tensor([0.6025], grad_fn=<ViewBackward0>)
logits shape : torch.Size([5, 1])
target shape : torch.Size([5, 1]) tensor([-0.0549])
Epoch 1, Batch 3 processed loss : tensor([0.2805], grad_fn=<ViewBackward0>)
logits shape : torch.Size([5, 1])
target shape : torch.Size([5, 1]) tensor([1.0854])
Epoch 1, Batch 4 processed loss : tensor([0.7152], grad_fn=<ViewBackward0>)
logits shape : torch.Size([5, 1])
target shape : torch.Size([5, 1]) tensor([0.4700])
Epoch 1, Batch 5 processed loss : tensor([0.4902], grad_fn=<ViewBackward0>)
logits shape : torch.Size([5, 1])
target shape : torch.Size([5, 1]) tensor([-2.4879])
Epoch 1, Batch 6 processed loss : tensor([0.6985], grad_fn=<ViewBackward0>)
logits shape : torch.Size([5, 

In [11]:
terminate(rank=rank)

Rank 2 successfully terminated.
