In [1]:
import torch
import torch.nn as nn
import torch.distributed as dist 
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F
import numpy as np 
import os 
import random 
import time 
import logging 
from datetime import timedelta
import pandas as pd

In [2]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7786645b3d50>

In [3]:
# ------------- Init / Communication Utilities -------------

def init(rank, world_size, backend='gloo'):
    os.environ['GLOO_SOCKET_IFNAME'] = 'eth0'
    os.environ['MASTER_ADDR'] = 'client1'
    os.environ['MASTER_PORT'] = '29500'
    
    dist.init_process_group(
            backend=backend,
            world_size=world_size,
            rank=rank,
            timeout=timedelta(seconds=60),
        )
    
    print(f'Rank {rank} initialized and ready.')
    return dist.is_initialized()

def recv(arr, src):
    dist.recv(tensor=arr, src=src)

def snd(arr, dst):
    dist.send(tensor=arr, dst=dst)

def terminate(rank):
    dist.destroy_process_group()
    print(f'Rank {rank} successfully terminated.')

def send_model(model, dst):
    for key, param in model.state_dict().items():
        dist.send(param.data, dst=dst)
        print(f"Sent {key}")

def recv_model(model, src):
    for key, param in model.state_dict().items():
        dist.recv(param.data, src=src)
        print(f"Received {key}")

In [4]:
# ------------------ Dataset ------------------

class DatasetC2(Dataset):
    def __init__(self, transform=None, path='/SFVL/motor_c1_train.npy'):
        super().__init__()
        self.data = pd.read_csv(path)
        self.transform = transform
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        x=self.data.iloc[index]
        idx = x['idx']
        x=x.drop(['idx','Unnamed: 0'])
        if self.transform:
            x = self.transform(x) 
        return  idx,x

In [5]:
# ------------------ Transform ------------------

class ToTensor:
    def __call__(self, input):
        return torch.tensor(input.values,dtype =torch.float32)

In [6]:
# ------------------ Model ------------------

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(in_features=6, out_features=8)
    
    def forward(self, input):
        return F.relu(self.input_layer(input))

In [7]:
# ------------------ Training Loop ------------------

def run(num_epoch,model):
    batch_size = 5
    transform = ToTensor()
    dataset = DatasetC2(transform=transform, path='/SFVL/motor_c1_train.csv')
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False)
    
    
    model.train()
    
    optim = torch.optim.Adam(params=model.parameters(), lr=0.001)
    
    for epoch in range(num_epoch):
        for idx, (id,input_batch) in enumerate(dataloader):
            # Forward pass on client (bottom) model
            smashed_data = model(input_batch)
            print(smashed_data.size())

            # Send smashed data to server
            snd(smashed_data, dst=2)

            # Receive gradient from server
            gradient = torch.zeros_like(smashed_data,dtype=torch.float32)
            recv(gradient, src=2)
            # print(gradient)

            # Backpropagation and optimizer step
            optim.zero_grad()
            smashed_data.backward(gradient)
            optim.step()
            print(f"Epoch {epoch+1}, Batch {idx+1} processed")


In [8]:
def evaluation(model):
    model.eval()
    
    # Load test data from CSV (drop 'idx' if present)
    df = pd.read_csv('/SFVL/motor_c1_test.csv')
    if 'idx' in df.columns:
        df = df.drop(columns=['idx','Unnamed: 0'])
    
    # Convert to tensor
    input_tensor = torch.tensor(df.values, dtype=torch.float32)
    print(input_tensor[:5])  # First 5 rows for verification
    
    # Forward pass
    smashed_data = model(input_tensor)
    
    # Send smashed data to server
    snd(smashed_data, dst=2)
    
    print(smashed_data.size())
    print('Smashed data sent successfully.')


In [9]:
rank = 0 
world_size = 3
model = Model()
init(rank=rank,world_size=world_size)

Rank 0 initialized and ready.


True

In [10]:
num_epoch = 10
run(num_epoch=num_epoch,model=model)
evaluation(model=model)

torch.Size([5, 8])
Epoch 1, Batch 1 processed
torch.Size([5, 8])
Epoch 1, Batch 2 processed
torch.Size([5, 8])
Epoch 1, Batch 3 processed
torch.Size([5, 8])
Epoch 1, Batch 4 processed
torch.Size([5, 8])
Epoch 1, Batch 5 processed
torch.Size([5, 8])
Epoch 1, Batch 6 processed
torch.Size([5, 8])
Epoch 1, Batch 7 processed
torch.Size([5, 8])
Epoch 1, Batch 8 processed
torch.Size([5, 8])
Epoch 1, Batch 9 processed
torch.Size([5, 8])
Epoch 1, Batch 10 processed
torch.Size([5, 8])
Epoch 1, Batch 11 processed
torch.Size([5, 8])
Epoch 1, Batch 12 processed
torch.Size([5, 8])
Epoch 1, Batch 13 processed
torch.Size([5, 8])
Epoch 1, Batch 14 processed
torch.Size([5, 8])
Epoch 1, Batch 15 processed
torch.Size([5, 8])
Epoch 1, Batch 16 processed
torch.Size([5, 8])
Epoch 1, Batch 17 processed
torch.Size([5, 8])
Epoch 1, Batch 18 processed
torch.Size([5, 8])
Epoch 1, Batch 19 processed
torch.Size([5, 8])
Epoch 1, Batch 20 processed
torch.Size([5, 8])
Epoch 1, Batch 21 processed
torch.Size([5, 8])
Epo

In [11]:
terminate(rank=rank)

Rank 0 successfully terminated.
