# Imports

In [4]:
from utils import *
import os
import requests
import together
import json
import wandb
import random

In [5]:
prompt = """
    What phone should I buy, give me a detailed analysis of the best phones in the market right now.
"""
response = llama(prompt, verbose=True)

Prompt:
[INST]
    What phone should I buy, give me a detailed analysis of the best phones in the market right now.
[/INST]

model: togethercomputer/llama-2-7b-chat


In [6]:
print(response["output"]["choices"][0]["text"])

  Certainly! Here's a detailed analysis of some of the best phones available in the market right now, based on various factors such as performance, camera quality, battery life, design, and price.

1. Apple iPhone 12 Pro:
	* Pros: Superb camera performance, impressive battery life, stunning design, and fast charging capabilities.
	* Cons: Expensive, no headphone jack, and limited customization options.
	* Price: $799 - $849 (64GB), $899 - $949 (128GB), $1099 - $1149 (256GB)
2. Samsung Galaxy S21:
	* Pros: Powerful Exynos 2100 chipset, impressive camera performance, long battery life, and fast charging capabilities.
	* Cons: No headphone jack, limited customization options, and slightly larger than some other phones.
	* Price: $799 - $849 (12GB + 128GB), $899 - $949 (12GB + 256GB), $1099 - $1149 (12GB + 512GB)
3. Google Pixel 6:
	* Pros: Exceptional camera performance, timely software updates, and clean, intuitive interface.
	* Cons: No headphone jack, limited customization options, and

# Training on custom data

In [7]:
api_url = "https://api.together.xyz/train"
api_key = os.getenv("TOGETHER_API_KEY")  
wandb_api_key = os.getenv("WANDB_API_KEY")
together.api_key = api_key

base_model_name = "togethercomputer/llama-2-7b-chat" 

training_parameters = {
    "model": "togethercomputer/llama-7b-chat",
    "training_steps": 10000,
    "learning_rate": 1e-4,
    "batch_size": 8, 
} 

In [5]:
with open('data.json', 'r') as file:
    try: 
        json_data = json.load(file) 
    except json.JSONDecodeError as e:
        print('Error decoding JSON:', e)
    except FileNotFoundError:
        print('File not found.')
    except Exception as e:
        print('An error occurred:', e)


In [8]:
json_data[0]

{'input': 'Is Nokia G21 6GB a good phone?',
 'target': " Nokia G21 6GB - Budget Beast Introduced by Nokia!\nNokia has launched the expanded G21 6GB smartphone recently as a way of cheering on the current tough competition in the market and as a way of contributing to the success of the device, as it comes with amazing and unbelievable features. The whole new Nokia's G21 6GB landed with a Unisoc T606 (12 nm) chipset with the combination of a powerful Octa-core (2 x 1.6 GHz Cortex-A75 + 6 x 1.6 GHz Cortex-A55) CPU to increase the performance and speed. After that, with the new Nokia G21's 6GB, you will have 4GB of RAM with 128 GB of internal memory and also a dedicated memory card slot. The rumours about the Nokia G21 6GB suggest that it is going to be stable in the market as compared to its previous models since it comes with a triple camera system of 50 MP (wide) + 2 MP (macro) + 2 MP (depth) sensors, allowing users to experience another level of photography. On the front of the Nokia 

In [9]:
def format_to_llama2_chat(system_prompt, user_model_chat_list):
 

    growing_prompt = f"""<s>[INST] <<SYS>> {system_prompt} <</SYS>>"""

    for user_msg, model_answer in user_model_chat_list:
        growing_prompt += f""" {user_msg} [/INST] {model_answer} </s>"""

    return growing_prompt

format_to_llama2_chat(
    "You are a good robot",
    [("hi robot", "hello human"),("are you good?", "yes im good"),("are you bad?", "no, im good")]
)

'<s>[INST] <<SYS>> You are a good robot <</SYS>> hi robot [/INST] hello human </s> are you good? [/INST] yes im good </s> are you bad? [/INST] no, im good </s>'

In [13]:
data_list = []

for sample in json_data:
    input = sample['input'] if sample['input'] is not None else ""

    training_sequence = format_to_llama2_chat(
        "you are a helpful mobile phone expert. You know all about phones",
        [(input,sample['target'])]
    )

    data_list.append({
        "text":training_sequence
    })

print(len(data_list))
print(data_list[0])

17010
{'text': "<s>[INST] <<SYS>> you are a helpful mobile phone expert. You know all about phones <</SYS>> Is Nokia G21 6GB a good phone? [/INST]  Nokia G21 6GB - Budget Beast Introduced by Nokia!\nNokia has launched the expanded G21 6GB smartphone recently as a way of cheering on the current tough competition in the market and as a way of contributing to the success of the device, as it comes with amazing and unbelievable features. The whole new Nokia's G21 6GB landed with a Unisoc T606 (12 nm) chipset with the combination of a powerful Octa-core (2 x 1.6 GHz Cortex-A75 + 6 x 1.6 GHz Cortex-A55) CPU to increase the performance and speed. After that, with the new Nokia G21's 6GB, you will have 4GB of RAM with 128 GB of internal memory and also a dedicated memory card slot. The rumours about the Nokia G21 6GB suggest that it is going to be stable in the market as compared to its previous models since it comes with a triple camera system of 50 MP (wide) + 2 MP (macro) + 2 MP (depth) sen

In [14]:
together.Files.save_jsonl(data_list, "legal_dataset.jsonl")

Wrote 17010 records to legal_dataset.jsonl


In [55]:
resp = together.Files.check(file="legal_dataset.jsonl")
print(resp)

{'is_check_passed': True, 'model_special_tokens': 'we are not yet checking end of sentence tokens for this model', 'file_present': 'File found', 'file_size': 'File size 0.03 GB', 'num_samples': 17010}


In [56]:
resp = together.Files.list()
print(resp)

{'data': [{'filename': 'legal_dataset.jsonl', 'bytes': 32276562, 'created_at': 1709391344, 'id': 'file-6fb5297f-37b3-4b60-83a2-c79678d636d5', 'purpose': 'fine-tune', 'object': 'file', 'LineCount': 0, 'Processed': True}], 'object': 'list'}


In [17]:
file_resp = together.Files.upload(file="legal_dataset.jsonl")
file_id = file_resp["id"]
print(file_resp)

Uploading legal_dataset.jsonl: 100%|██████████| 30.8M/30.8M [00:11<00:00, 2.90MB/s]

{'filename': 'legal_dataset.jsonl', 'id': 'file-6fb5297f-37b3-4b60-83a2-c79678d636d5', 'object': 'file', 'report_dict': {'is_check_passed': True, 'model_special_tokens': 'we are not yet checking end of sentence tokens for this model', 'file_present': 'File found', 'file_size': 'File size 0.03 GB', 'num_samples': 17010}}





In [19]:
ft_resp = together.Finetune.create(
  training_file = file_id ,
  model = base_model_name,
  n_epochs = 2,
  batch_size = 4,
  n_checkpoints = 1,
  learning_rate = 5e-5,
  wandb_api_key = wandb_api_key,
  #estimate_price = True,
  suffix = 'law',
)

fine_tune_id = ft_resp['id']
print(ft_resp)

{'training_file': 'file-6fb5297f-37b3-4b60-83a2-c79678d636d5', 'validation_file': '', 'model_output_name': 'harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52', 'model_output_path': 's3://together-dev/finetune/65e2c8f5d14c747cee2d3e16/harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52/ft-1739a3ac-6d52-4c95-ab88-1dcdd53bb4ce', 'Suffix': 'law', 'model': 'togethercomputer/llama-2-7b-chat', 'n_epochs': 2, 'n_checkpoints': 1, 'batch_size': 4, 'learning_rate': 5e-05, 'eval_steps': 0, 'user_id': '65e2c8f5d14c747cee2d3e16', 'lora': False, 'lora_r': 8, 'lora_alpha': 8, 'lora_dropout': 0, 'staring_epoch': 0, 'training_offset': 0, 'checkspoint_path': '', 'random_seed': '', 'created_at': '2024-03-02T15:00:52.08Z', 'updated_at': '2024-03-02T15:00:52.08Z', 'status': 'pending', 'owner_address': '0x75b8672cb99aa4456064dc1725ca235012fc6907', 'id': 'ft-1739a3ac-6d52-4c95-ab88-1dcdd53bb4ce', 'job_id': '', 'token_count': 0, 'param_count': 0, 'total_price': 0, 'epochs_comp

In [58]:
print(together.Finetune.retrieve(fine_tune_id=fine_tune_id)) # retrieves information on finetune event
print("-"*50)
print(together.Finetune.get_job_status(fine_tune_id=fine_tune_id)) # pending, running, completed
print(together.Finetune.is_final_model_available(fine_tune_id=fine_tune_id)) # True, False
print(together.Finetune.get_checkpoints(fine_tune_id=fine_tune_id)) # list of checkpoints

{'training_file': 'file-6fb5297f-37b3-4b60-83a2-c79678d636d5', 'validation_file': '', 'model_output_name': 'harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52', 'model_output_path': 's3://together-dev/finetune/65e2c8f5d14c747cee2d3e16/harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52/ft-1739a3ac-6d52-4c95-ab88-1dcdd53bb4ce-2024-03-02-07-42-02', 'Suffix': 'law', 'model': 'togethercomputer/llama-2-7b-chat', 'n_epochs': 2, 'n_checkpoints': 1, 'batch_size': 4, 'learning_rate': 5e-05, 'eval_steps': 0, 'user_id': '65e2c8f5d14c747cee2d3e16', 'lora': False, 'lora_r': 8, 'lora_alpha': 8, 'lora_dropout': 0, 'staring_epoch': 0, 'training_offset': 0, 'checkspoint_path': '', 'random_seed': '', 'created_at': '2024-03-02T15:00:52.08Z', 'updated_at': '2024-03-02T15:46:31.207Z', 'status': 'completed', 'owner_address': '0x75b8672cb99aa4456064dc1725ca235012fc6907', 'id': 'ft-1739a3ac-6d52-4c95-ab88-1dcdd53bb4ce', 'job_id': '15219', 'token_count': 8888376, 'param_count':

In [57]:
print(together.Finetune.get_job_status(fine_tune_id=fine_tune_id)) # pending, running, completed

completed


In [8]:
new_model_name = 'harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52'

model_list = together.Models.list()

print(f"{len(model_list)} models available")

available_model_names = [model_dict['name'] for model_dict in model_list]

new_model_name in available_model_names

165 models available


True

In [59]:
together.Models.start(new_model_name)

{'success': True,
 'value': 'a91ababf3ee45a618815ba9e194e4c9720e9de95b7835cf4b7027c4b62832a1f-a1249c295f3ca6c1c65c19caca9a34951a93a0ee140232fdde0f8f9811dc39a5'}

In [60]:
together.Models.ready(new_model_name)

[{'modelInstanceConfig': {'appearsIn': [], 'order': 0},
  '_id': '65e349d707d57fd92ca0c37b',
  'name': 'harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52',
  'display_type': 'chat',
  'description': 'Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters',
  'creator_organization': 'harisrehman195103@gmail.com',
  'hardware_label': 'L40',
  'num_parameters': '6738415616',
  'release_date': '2024-03-02T15:46:31.235Z',
  'show_in_playground': True,
  'owner': 'harisrehman195103@gmail.com',
  'owner_address': '0x75b8672cb99aa4456064dc1725ca235012fc6907',
  'owner_userid': '65e2c8f5d14c747cee2d3e16',
  'parent': 'togethercomputer/llama-2-7b-chat',
  'base': 'togethercomputer/llama-2-7b-chat',
  'path': 'r2://together-dev/finetune/65e2c8f5d14c747cee2d3e16/harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52/ft-1739a3ac-6d52-4c95-ab88-1dcdd53bb4ce-2024-03-02-07-

In [15]:
test_chat_prompt = "<s>[INST] <<SYS>>you are a helpful mobile phone expert. You know all about phones <</SYS>> hello! [/INST]"
test_chat_prompt

'<s>[INST] <<SYS>>you are a helpful mobile phone expert. You know all about phones <</SYS>> hello! [/INST]'

In [9]:
print(new_model_name)

harisrehman195103@gmail.com/llama-2-7b-chat-law-2024-03-02-15-00-52


In [13]:
output = together.Complete.create(
  prompt = test_chat_prompt,
  model = new_model_name,
  max_tokens = 256,
  temperature = 0.2,
  top_k = 90,
  top_p = 0.8,
  repetition_penalty = 1.1,
  stop = ['</s>']
)

print(output['prompt'][0]+" -> "+output['output']['choices'][0]['text'])

<s>[INST] <<SYS>> <</SYS>> hello! [/INST] ->  Hello!
 nobody is perfect, every one of us make mistakes. But the good thing is that we can always learn from our errors and improve ourselves. 


In [14]:
output['output']['choices'][0]['text']

' Hello!\n nobody is perfect, every one of us make mistakes. But the good thing is that we can always learn from our errors and improve ourselves. '

In [53]:
together.Models.stop(new_model_name)

{'success': True}