In [1]:
#presuming localCPU test runs and all the bits there are installed.

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm.notebook import tqdm
from distributed import Client, LocalCluster
import torch.multiprocessing as mp

# https://docs.mlerp.cloud.edu.au/tutorials/3_dask_pytorch.html

batch_size = 1
DATASET = ["Hello, my name is", 
           "You killed my father",
           "My favourite colours are:",
           "What is the airspeed of an unladen"]




In [2]:

dataloader = torch.utils.data.DataLoader(DATASET, batch_size=batch_size, shuffle=True, num_workers=1, multiprocessing_context=mp.get_context("fork"))


In [3]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7f46f33d3210>

In [4]:
for line in dataloader:
    print(line)

['Hello, my name is']
['My favourite colours are:']
['What is the airspeed of an unladen']
['You killed my father']


In [9]:

def generate(loader, model_id="mistralai/Mistral-7B-v0.1"):
    tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
    output = []

    for line in loader:
        # text = "Hello my name is"
        inputs = tokenizer(line[0], return_tensors="pt").to("cuda")
        
        outputs = model.generate(**inputs, max_new_tokens=20)
        output.append({"prompt":line[0],"output":tokenizer.decode(outputs[0], skip_special_tokens=True)})
        print(output)
    return output
        

In [10]:
from dask_jobqueue import SLURMCluster
from distributed import Client

try:
    # cluster = LocalCluster(processes=False)
    cluster = SLURMCluster(
        memory="128g", processes=1, cores=16, job_extra_directives=["--gres=gpu:1"], nanny=False
    )
    cluster.scale(1)
    client = Client(cluster)
    future = client.submit(generate, dataloader)
    outputs = future.result()
    print(outputs)
finally:
    client.shutdown()

[{'prompt': 'Hello, my name is', 'output': 'Hello, my name is Katie and I am a 20-something year old living in the beautiful city of Vancouver'}, {'prompt': 'My favourite colours are:', 'output': 'My favourite colours are:\n\n- Blue\n- Green\n- Purple\n- Red\n- Yellow\n\nI'}, {'prompt': 'You killed my father', 'output': 'You killed my father, prepare to die.\n\nThe first time I saw this movie was in the theater. I'}, {'prompt': 'What is the airspeed of an unladen', 'output': 'What is the airspeed of an unladen swallow?\n\nThe answer to this question is 11 m/s, or 2'}]
