In [None]:
import itertools
import jsonlines

from datasets import load_dataset
from pprint import pprint
# from llama import BasicModelRunner
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [2]:
instruction_tuned_dataset = load_dataset('tatsu-lab/alpaca', split='train', streaming=True)

In [11]:
n = 5
print("Pretrained dataset: ")
top_n = itertools.islice(instruction_tuned_dataset, n)
for i in top_n:
    print(i)

Pretrained dataset: 
{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nGive three tips for staying healthy.\n\n### Response:\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.'}
{'instruction': 'What are the three primary colors?', 'input': '', 'output': 'The three primary colors are red, blue, and yellow.', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat are the three primary

In [15]:
prompt_template_with_input = '''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:'''

prompt_template_without_input = '''Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Response:'''

In [16]:
processed_data = []
top_n = itertools.islice(instruction_tuned_dataset, n)
for i in top_n:
    if not i['input']:
        processed_prompt = prompt_template_without_input.format(instruction=i['instruction'])
    else:
        processed_prompt = prompt_template_with_input.format(instruction=i['instruction'], input=i['input'])
    processed_data.append({'input': processed_prompt, 'output': i['output']})

In [17]:
pprint(processed_data[0])

{'input': 'Below is an instruction that describes a task. Write a response '
          'that appropriately completes the request.\n'
          '\n'
          '### Instruction:\n'
          'Give three tips for staying healthy.\n'
          '\n'
          '### Response:',
 'output': '1.Eat a balanced diet and make sure to include plenty of fruits '
           'and vegetables. \n'
           '2. Exercise regularly to keep your body active and strong. \n'
           '3. Get enough sleep and maintain a consistent sleep schedule.'}


In [None]:
# non_instructed_model = BasicModelRunner('meta-llama/Llama-2-7b-hf')
# non_instructed_output = non_instructed_model('Tell me how to train my dog to sit.')
# print('Not intruction-tuned model output: ', non_instructed_output)

In [None]:
# instructed_model = BasicModelRunner('meta-llama/Llama-2-7b-chat-hf')
# instructed_output = instructed_model('Tell me how to train my dog to sit.')
# print('Intruction-tuned model output: ', instructed_output)

In [27]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained('EleutherAI/pythia-70m')
model = AutoModelForCausalLM.from_pretrained('EleutherAI/pythia-70m')
model.to('cuda')
print(model.device)

cuda:0


In [28]:
def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=1000):
    # Tokenize
    input_ids = tokenizer.encode(text, 
                                return_tensors='pt', 
                                truncation=True,
                                max_length=max_input_tokens)
    # Generate
    device = model.device
    generated_tokens_with_prompt = model.generate(input_ids.to(device), 
                                                  max_length=max_output_tokens)
    # Decode
    generated_tokens_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

    # Strip the prompt
    generated_text_answer = generated_tokens_with_prompt[0][len(text):]

    return generated_text_answer

In [29]:
finetuning_dataset = load_dataset('lamini/lamini_docs')
print(finetuning_dataset)

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 1260
    })
    test: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 140
    })
})


In [30]:
test_sample = finetuning_dataset['test'][0]
print(test_sample)
print(inference(test_sample['question'], model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


{'question': 'Can Lamini generate technical documentation or user manuals for software projects?', 'answer': 'Yes, Lamini can generate technical documentation and user manuals for software projects. It uses natural language generation techniques to create clear and concise documentation that is easy to understand for both technical and non-technical users. This can save developers a significant amount of time and effort in creating documentation, allowing them to focus on other aspects of their projects.', 'input_ids': [5804, 418, 4988, 74, 6635, 7681, 10097, 390, 2608, 11595, 84, 323, 3694, 6493, 32, 4374, 13, 418, 4988, 74, 476, 6635, 7681, 10097, 285, 2608, 11595, 84, 323, 3694, 6493, 15, 733, 4648, 3626, 3448, 5978, 5609, 281, 2794, 2590, 285, 44003, 10097, 326, 310, 3477, 281, 2096, 323, 1097, 7681, 285, 1327, 14, 48746, 4212, 15, 831, 476, 5321, 12259, 247, 1534, 2408, 273, 673, 285, 3434, 275, 6153, 10097, 13, 6941, 731, 281, 2770, 327, 643, 7794, 273, 616, 6493, 15], 'attention

In [33]:
instructed_model = AutoModelForCausalLM.from_pretrained('lamini/lamini_docs_finetuned')
instructed_model.to('cuda')
print(inference(test_sample['question'], instructed_model, tokenizer))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Yes, Lamini can generate technical documentation or user manuals for software projects. This can be achieved by providing a prompt for a specific technical question or question to the LLM Engine, or by providing a prompt for a specific technical question or question. Additionally, Lamini can be trained on specific technical questions or questions to help users understand the process and provide feedback to the LLM Engine. Additionally, Lamini can be trained on specific technical questions or questions to help users understand the process and provide feedback to the LLM Engine. Additionally, Lamini can be trained on specific technical questions or questions to help users understand the process and provide feedback to the LLM Engine. Additionally, Lamini can be trained on specific technical questions or questions to help users understand the process and provide feedback to the LLM Engine. Additionally, Lamini can be trained on specific technical questions or questions to help users under