In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time

In [2]:
def predict_code(tokenizer, model, input_code, device='cpu'):
    model.to(device)
    input_ids = tokenizer.encode(input_code, return_tensors='pt').to(device)
    with torch.no_grad():
        output_ids = model.generate(input_ids, max_length=100)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

def calculate_throughput(tokenizer, model, input_code, device='cpu', num_runs=100):
    model.to(device)
    input_ids = tokenizer.encode(input_code, return_tensors='pt').to(device)
    start_time = time.time()
    with torch.no_grad():
        for _ in range(num_runs):
            _ = model.generate(input_ids, max_length=100)
    end_time = time.time()
    throughput = num_runs / (end_time - start_time)
    return throughput

In [3]:
input_code = "def hello_world():"

In [None]:
tokenizer = AutoTokenizer.from_pretrained("budecosystem/code-millenials-1b", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("marco-molinari/python-code-millenials-1b", trust_remote_code=True)
model.eval()

In [None]:
completed_code = predict_code(tokenizer, model, input_code)
print("Predicted Code:", completed_code)

throughput = calculate_throughput(tokenizer, model, input_code)
print("Throughput (in sequences per second):", throughput)