In [None]:
# Distributed P2P DistilGPT-2 Demo

Multi-instance distributed LLM system demo.

## Setup
- Instance 1 (172.31.42.169): Shard 0 (layers 0-2)
- Instance 2 (172.31.34.102): Shard 1 (layers 3-5)

## Steps
1. Start shards on both instances
2. Check health and peer discovery
3. Test distributed text generation


In [1]:
import requests
import json

# Instance configuration
INSTANCE_1_IP = "172.31.42.169"  # Shard 0
INSTANCE_2_IP = "172.31.34.102"  # Shard 1
PORT = 8000

SHARD_0_URL = f"http://{INSTANCE_1_IP}:{PORT}"
SHARD_1_URL = f"http://{INSTANCE_2_IP}:{PORT}"

print(f"Shard 0: {SHARD_0_URL}")
print(f"Shard 1: {SHARD_1_URL}")


Shard 0: http://172.31.42.169:8000
Shard 1: http://172.31.34.102:8000


In [None]:
## Start Shards

**Instance 1:**
```bash
cd /home/ubuntu/llm_p2p
./scripts/setup_shard1.sh
```

**Instance 2:**
```bash
ssh ubuntu@172.31.34.102
cd /home/ubuntu/llm_p2p
./scripts/setup_shard2.sh
```

Wait 30-60 seconds for model loading.


In [2]:
# Check shard health
def check_health(url, name):
    try:
        response = requests.get(f"{url}/health", timeout=5)
        if response.status_code == 200:
            data = response.json()
            print(f"{name}: HEALTHY - Shard {data['shard_id']}, Layers {data['layers']}")
            return True
        else:
            print(f"{name}: ERROR - Status {response.status_code}")
            return False
    except Exception as e:
        print(f"{name}: NOT ACCESSIBLE - {e}")
        return False

# Check both instances
print("Health Check:")
shard_0_ok = check_health(SHARD_0_URL, "Instance 1")
shard_1_ok = check_health(SHARD_1_URL, "Instance 2")

if shard_0_ok and shard_1_ok:
    print("Both shards ready")
else:
    print("Some shards not ready")


Health Check:
Instance 1: HEALTHY - Shard 0, Layers 0-2
Instance 2: HEALTHY - Shard 1, Layers 3-5
Both shards ready


In [3]:
# Check peer discovery
def check_peers(url, name):
    try:
        response = requests.get(f"{url}/peers", timeout=10)
        if response.status_code == 200:
            data = response.json()
            print(f"{name}: Found {data['total_peers']} peers")
            for peer in data['peers']:
                print(f"  Shard {peer['shard_id']}: {peer['host']}:{peer['port']}")
            return len(data['peers']) > 0
        else:
            print(f"{name}: Peer check failed")
            return False
    except Exception as e:
        print(f"{name}: Peer check error - {e}")
        return False

print("Peer Discovery:")
check_peers(SHARD_0_URL, "Instance 1")
check_peers(SHARD_1_URL, "Instance 2")


Peer Discovery:
Instance 1: Found 2 peers
  Shard 1: 172.31.34.102:8000
  Shard 0: 172.31.42.169:8000
Instance 2: Found 2 peers
  Shard 0: 172.31.42.169:8000
  Shard 1: 172.31.34.102:8000


True

In [4]:
# Generate text using distributed inference
def generate_text(prompt, max_length=20, url=SHARD_0_URL):
    request_data = {
        "prompt": prompt,
        "max_length": max_length,
        "temperature": 0.7,
        "top_p": 0.9,
        "top_k": 50,
        "do_sample": True,
        "repetition_penalty": 1.1
    }
    
    try:
        response = requests.post(f"{url}/generate", json=request_data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            print(f"Input: '{result['prompt']}'")
            print(f"Output: '{result['generated_texts'][0]}'")
            print(f"Time: {result['processing_time']:.2f}s")
            print(f"Shards used: {result['shards_used']}")
            return result
        else:
            print(f"Generation failed: {response.status_code}")
            return None
    except Exception as e:
        print(f"Request failed: {e}")
        return None

# Test generation
print("Text Generation Test:")
generate_text("Hello distributed P2P", max_length=15)


Text Generation Test:
Input: 'Hello distributed P2P'
Output: 'Hello distributed P2P projects in the future.




'
Time: 7.38s
Shards used: [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]


{'generated_texts': ['Hello distributed P2P projects in the future.\n\n\n\n\n'],
 'prompt': 'Hello distributed P2P',
 'processing_time': 7.3806798458099365,
 'shards_used': [0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1]}

In [5]:
# Test P2P routing - requests to different instances
print("P2P Routing Test:")

prompt = "P2P networks enable"

print("Request to Instance 1:")
result1 = generate_text(prompt, max_length=18, url=SHARD_0_URL)

print("\nRequest to Instance 2:")
result2 = generate_text(prompt, max_length=18, url=SHARD_1_URL)

if result1 and result2:
    print("\nBoth instances working")
    print("P2P auto-routing successful")
else:
    print("\nSome routing failed")


P2P Routing Test:
Request to Instance 1:
Input: 'P2P networks enable'
Output: 'P2P networks enable low latency network access to high-quality content.


'
Time: 9.84s
Shards used: [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

Request to Instance 2:
Input: 'P2P networks enable'
Output: 'P2P networks enable a network to communicate in the same way that a computer can communicate'
Time: 9.80s
Shards used: [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

Both instances working
P2P auto-routing successful
