In [None]:
pip install ray==2.36.0

In [None]:
pip install ray[client]

In [None]:
pip install -U ipywidgets

In [None]:
pip install torch

In [None]:
pip install transformers

In [None]:
pip install pandas

In [None]:
!python --version

In [None]:
import ray
runtime_env = {"pip": ["torch","IPython","transformers","filelock"]}
# ray://${RAYCLUSTER_HEAD_SVC}.${NAMESPACE}.svc.cluster.local:${RAY_CLIENT_PORT}
ray.init(address="ray://raycluster1-kuberay-head-svc.default.svc.cluster.local:10001", runtime_env=runtime_env)
print(ray.cluster_resources())

In [None]:
ray.shutdown()

In [None]:
import ray
import torch
import random
import time
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification

def load_model():
    """Load the BERT model and tokenizer on GPU."""
    model_name = "bert-base-uncased"
    device = torch.device("cuda")

    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)  # Binary classification example
    model.to(device).eval()

    return tokenizer, model, device

def augment_sentence(sentence):
    """Create minor variations of a sentence for dataset augmentation."""
    synonyms = {
        "amazing": ["incredible", "fantastic", "awesome"],
        "disappointing": ["unsatisfactory", "frustrating", "terrible"],
        "great": ["wonderful", "excellent", "superb"],
        "worst": ["horrible", "awful", "terrible"],
        "fantastic": ["phenomenal", "spectacular", "superb"]
    }
    
    words = sentence.split()
    for i in range(len(words)):
        if words[i] in synonyms and random.random() > 0.5:
            words[i] = random.choice(synonyms[words[i]])

    if random.random() > 0.7:
        random.shuffle(words)

    return " ".join(words)

def generate_large_dataset(num_samples=100000):
    """Generate a large dataset with sentence variations."""
    base_sentences = [
        "The product is amazing and works flawlessly.",
        "I didn't like the service at all. It was disappointing.",
        "The experience was great! I highly recommend it.",
        "The worst purchase I have ever made. Never again.",
        "Absolutely fantastic! I loved every bit of it."
    ]
    
    dataset = [augment_sentence(random.choice(base_sentences)) for _ in range(num_samples)]
    return dataset

@ray.remote(num_gpus=1)  # Assign GPU to each worker
def run_inference_worker(text_list, batch_size=512):
    """Run inference on a subset of data on a GPU worker."""
    tokenizer, model, device = load_model()
    
    start_time = time.time()
    predictions = []

    for i in range(0, len(text_list), batch_size):
        batch_texts = text_list[i:i + batch_size]
        inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt").to(device)

        with torch.no_grad():
            outputs = model(**inputs)

        torch.cuda.synchronize()
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        batch_predictions = torch.argmax(probabilities, dim=-1).tolist()
        predictions.extend(batch_predictions)

        # print(f"Processed batch {i // batch_size + 1}/{len(text_list) // batch_size} ")

    end_time = time.time()
    duration = end_time - start_time
    print(f"Worker finished processing {len(text_list)} samples in {duration:.2f} seconds")
    
    return predictions, duration

def run_distributed_inference(dataset, num_workers=4, batch_size=1024):
    """Distribute the dataset across multiple workers using Ray."""
    chunk_size = len(dataset) // num_workers
    dataset_chunks = [dataset[i * chunk_size:(i + 1) * chunk_size] for i in range(num_workers)]

    # Distribute workload to workers
    futures = [run_inference_worker.remote(chunk, batch_size) for chunk in dataset_chunks]
    
    # Gather results from all workers
    results = ray.get(futures)
    
    # Extract predictions and durations
    final_predictions = [pred for sublist, _ in results for pred in sublist]  # Flatten the list
    durations = [duration for _, duration in results]

    print(f"Total processed samples: {len(final_predictions)}")
    print(f"Total duration for this run: {sum(durations):.2f} seconds")

    return sum(durations)

if __name__ == "__main__":
    dataset = generate_large_dataset(num_samples=200000)  # Increased dataset size
    
    # Run the workload 25 times and measure duration
    durations = []
    total_start_time = time.time()
    workloads = 25
    
    for run in range(workloads):
        print(f"Starting run {run + 1}/{workloads}")
        start_time = time.time()
        duration = run_distributed_inference(dataset, num_workers=4, batch_size=1024)
        durations.append(duration)
        print(f"Run {run + 1} completed in {duration:.2f} seconds\n")
    
    total_end_time = time.time()
    total_duration = total_end_time - total_start_time

    # Print summary
    print(f"\n=== Performance Summary ===")
    print(f"Total duration for {workloads} runs: {total_duration:.2f} seconds")
    print(f"Average duration per run: {total_duration / workloads:.2f} seconds")

    # Save durations to CSV
    df = pd.DataFrame({"Run": list(range(1, workloads+1)), "Duration (seconds)": durations})
    df.to_csv("run_durations.csv", index=False)
    print("Durations saved to 'run_durations.csv'")