<a href="https://colab.research.google.com/github/Papa-Panda/Paper_reading/blob/main/Parameter_server.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# parameter server
# https://www.bilibili.com/read/cv16270512/?jump_opus=1
# https://www.bilibili.com/video/BV1YA4y197G8/?spm_id_from=333.337.search-card.all.click
# https://chatgpt.com/c/671cfd50-2910-800e-85e2-2502105d1391

In [2]:
import threading
import time
import numpy as np

class ParameterServer:
    def __init__(self, num_params):
        # Initialize shared parameters with random values
        self.parameters = np.random.rand(num_params)
        self.lock = threading.Lock()

    def get_parameters(self):
        # Return the current parameters (read-only)
        with self.lock:
            return self.parameters.copy()

    def update_parameters(self, gradient):
        # Update the parameters with the given gradient
        with self.lock:
            self.parameters -= 0.1 * gradient  # Simple gradient descent step
            print(f"Updated Parameters: {self.parameters}")

def worker(ps, worker_id):
    """Simulate a worker computing gradients and sending updates to the parameter server."""
    for epoch in range(3):
        # Get the current parameters from the server
        params = ps.get_parameters()
        print(f"Worker {worker_id} received parameters: {params}")

        # Simulate computing gradients (just using random values here)
        gradient = np.random.rand(len(params))
        print(f"Worker {worker_id} computed gradient: {gradient}")

        # Send the gradient to the parameter server
        ps.update_parameters(gradient)

        # Simulate some computation delay
        time.sleep(1)

if __name__ == "__main__":
    num_workers = 3
    num_params = 5  # Number of model parameters

    # Create a Parameter Server instance
    ps = ParameterServer(num_params)

    # Launch multiple worker threads
    threads = []
    for i in range(num_workers):
        t = threading.Thread(target=worker, args=(ps, i))
        threads.append(t)
        t.start()

    # Wait for all threads to finish
    for t in threads:
        t.join()

    print("Final Parameters:", ps.get_parameters())

Worker 0 received parameters: [0.06988384 0.70016882 0.59833023 0.82530764 0.55903081]
Worker 0 computed gradient: [0.48764011 0.93571694 0.90231022 0.42631718 0.20131776]
Worker 1 received parameters: [0.06988384 0.70016882 0.59833023 0.82530764 0.55903081]
Worker 1 computed gradient: [0.43749546 0.02488108 0.87034084 0.22645054 0.90254958]
Updated Parameters: [0.02613429 0.69768071 0.51129614 0.80266258 0.46877585]
Updated Parameters: [-0.02262972  0.60410902  0.42106512  0.76003086  0.44864407]
Worker 2 received parameters: [-0.02262972  0.60410902  0.42106512  0.76003086  0.44864407]
Worker 2 computed gradient: [0.11863408 0.36782503 0.76050471 0.12073516 0.31055374]
Updated Parameters: [-0.03449313  0.56732652  0.34501465  0.74795735  0.4175887 ]
Worker 0 received parameters: [-0.03449313  0.56732652  0.34501465  0.74795735  0.4175887 ]Worker 1 received parameters: [-0.03449313  0.56732652  0.34501465  0.74795735  0.4175887 ]

Worker 0 computed gradient: [0.53112494 0.93208044 0.4

In [5]:
import threading
import time
import numpy as np
from queue import Queue

class ParameterServerNode:
    """A server node holding a partition of the parameters."""
    def __init__(self, param_slice):
        self.parameters = param_slice
        self.lock = threading.Lock()

    def get_parameters(self):
        """Return a copy of the parameters for workers."""
        with self.lock:
            return self.parameters.copy()

    def update_parameters(self, gradient):
        """Update parameters with a gradient."""
        with self.lock:
            self.parameters -= 0.1 * gradient  # Gradient descent step

class ParameterServer:
    """Main parameter server that distributes parameter partitions."""
    def __init__(self, num_params, num_servers):
        self.servers = []
        # Partition parameters across multiple server nodes
        partition_size = num_params // num_servers
        for i in range(num_servers):
            start = i * partition_size
            end = (i + 1) * partition_size
            param_slice = np.random.rand(partition_size)
            self.servers.append(ParameterServerNode(param_slice))

    def get_partition(self, server_id):
        """Get parameters from a specific server node."""
        return self.servers[server_id].get_parameters()

    def update_partition(self, server_id, gradient):
        """Update parameters of a specific server node."""
        self.servers[server_id].update_parameters(gradient)

class WorkerNode(threading.Thread):
    """Worker node that computes gradients and updates the server."""
    def __init__(self, worker_id, ps, num_epochs, server_id):
        super().__init__()
        self.worker_id = worker_id
        self.ps = ps
        self.num_epochs = num_epochs
        self.server_id = server_id

    def run(self):
        for epoch in range(self.num_epochs):
            # Pull parameters from assigned server node
            params = self.ps.get_partition(self.server_id)
            print(f"Worker {self.worker_id} pulled params: {params}")

            # Simulate gradient computation (random gradient)
            gradient = np.random.rand(len(params))
            print(f"Worker {self.worker_id} computed gradient: {gradient}")

            # Push gradient update to server node
            self.ps.update_partition(self.server_id, gradient)
            print(f"Worker {self.worker_id} pushed gradient.")

            # Simulate some delay to mimic real-world workload
            time.sleep(1)

def main():
    num_params = 10  # Total number of parameters
    num_servers = 2  # Number of server nodes (partitions)
    num_workers = 3  # Number of worker nodes
    num_epochs = 3   # Number of epochs per worker

    # Initialize the Parameter Server
    ps = ParameterServer(num_params, num_servers)

    # Launch worker threads
    workers = []
    for i in range(num_workers):
        # Assign each worker to a server partition (round-robin)
        server_id = i % num_servers
        worker = WorkerNode(i, ps, num_epochs, server_id)
        workers.append(worker)
        worker.start()

    # Wait for all workers to finish
    for worker in workers:
        worker.join()

    # Print final parameters from all server nodes
    for i, server in enumerate(ps.servers):
        print(f"Final parameters on server {i}: {server.get_parameters()}")

if __name__ == "__main__":
    main()


Worker 0 pulled params: [0.76674045 0.20679982 0.17874522 0.58535647 0.13439081]
Worker 0 computed gradient: [0.50313609 0.84320696 0.24007032 0.93325103 0.37564311]
Worker 0 pushed gradient.
Worker 1 pulled params: [0.04819623 0.32114526 0.06134285 0.10630197 0.01038043]Worker 2 pulled params: [0.71642684 0.12247913 0.15473819 0.49203137 0.0968265 ]

Worker 1 computed gradient: [0.09840055 0.52395605 0.05489859 0.99286323 0.69209853]
Worker 1 pushed gradient.
Worker 2 computed gradient: [0.09590854 0.57717189 0.66019709 0.43777671 0.76961479]
Worker 2 pushed gradient.
Worker 0 pulled params: [0.70683599 0.06476194 0.08871848 0.4482537  0.01986502]
Worker 0 computed gradient: [0.89736703 0.60269473 0.41036854 0.66704673 0.5867233 ]
Worker 0 pushed gradient.
Worker 1 pulled params: [ 0.03835617  0.26874966  0.05585299  0.00701564 -0.05882943]
Worker 1 computed gradient: [0.09379491 0.93000085 0.93766031 0.2367472  0.88286188]
Worker 1 pushed gradient.
Worker 2 pulled params: [ 0.6170992