In [1]:
# !pip install git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git
# !pip install accelerate
# !pip install bitsandbytes
# !pip install sentencepiece
# !pip install datasets


In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import torch
torch.cuda.is_available()

True

In [3]:
import os
import torch.nn as nn
import bitsandbytes as bnb

In [9]:
import auto_gptq

Llama-2-70B-chat-GPTQ

In [10]:
num_gpus = torch.cuda.device_count()

# Iterate over the available GPUs
for i in range(num_gpus):
    # Get the properties of the GPU
    properties = torch.cuda.get_device_properties(i)
    
    # Print the name and memory consumption of the GPU
    print(f"GPU {i}: {properties.name}, Memory: {properties.total_memory}MB")

GPU 0: NVIDIA A100 80GB PCIe, Memory: 85031714816MB


In [11]:
device = torch.device("cuda:0")
device

device(type='cuda', index=0)

In [12]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# 42GB GPU RAM - peak consumption
model_path = "TheBloke/Llama-2-70B-chat-GPTQ"

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype = torch.float16,
    device_map = "auto",
    revision="gptq-4bit-32g-actorder_True",
)

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token_id = (
    0
)
tokenizer.padding_side = "left"

print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 8192, padding_idx=0)
    (layers): ModuleList(
      (0-79): 80 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (rotary_emb): LlamaRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): LlamaMLP(
          (act_fn): SiLUActivation()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head): Linear(in_features=8192, out_features=32000, bias=False)
)


In [13]:
from transformers import pipeline

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=150
)

In [14]:
def generation_function(prompt: str):
  features = pipeline(prompt)[0]["generated_text"]
  return features
  # features = pipeline(prompt)[0]["generated_text"][len(prompt):]
  # features = features.split("\n-")
  # features = [feat.replace("\n", "") for feat in features]
  # features = [feat.strip()for feat in features]
  # features = [feat for feat in features if len(feat)>0]
  # features = set(features)
  # return features

In [15]:
import json
from tqdm import tqdm

generated_answers = {}
output_file = "data/concepts_llama_mixed_prompts_chunck{}.json"
save_classes = "data/all_classes.txt"
with open(save_classes, "r") as f:
  classes = f.read().lower().split("\n")[1000:]

print('Total classes: ', len(classes))

saveper = 100

for i in tqdm(range(len(classes))):
    label = classes[i]
    generated_answers[label] = {}
    
    prompt1 = f"List the most important features for recognizing {label}. Write them in a numbered list, one concept per line."
    # prompt1 = f"List the most important features for recognizing something as a {label}. Write them one by one."
    generated1 = generation_function(prompt1)
    generated_answers[label]["A1"] = generated1
    
    # prompt2 = f"List the things most commonly seen around a {label}. Write them in a numbered list, one thing per line."
    prompt2 = f"List the things most commonly seen around a {label}. Write them one by one."
    generated2 = generation_function(prompt2)
    generated_answers[label]["A2"] = generated2
    
    # prompt3 = f"Give a generalization for the word {label}. Answer with a single sentence."
    prompt3 = f"Give a generalization for the word {label}"
    generated3 = generation_function(prompt3)
    generated_answers[label]["A3"] = generated3


    # Check if it's time to save the result to a new temporary file
    if (i + 1) % saveper == 0:
        # Format the filename using the output_path and (i+1)//k
        offset = 10
        filename = output_file.format(offset + (i + 1) // saveper)
        
        # Save the result to the specified filename
        with open(filename, 'w') as file:
            # print(generated_answers)
            json.dump(generated_answers, file, indent=4)
            generated_answers = {}
            print(f"Saved result to {filename}")


# Check if there are remaining elements
remaining_elements = len(classes) % saveper
if remaining_elements > 0:
    # Format the filename for the remaining elements
    offset = 10
    filename = output_file.format(offset + (len(classes) // saveper) + 1)
    
    # Save the remaining elements to the specified filename
    with open(filename, 'w') as file:
        json.dump(generated_answers, file, indent=4)
        print(f"Saved final result to {filename}")

# with open(output_file, "w") as json_file:
#     json.dump(generated_answers, json_file, indent=4)

Total classes:  285


 35%|███▌      | 100/285 [56:00<1:37:47, 31.71s/it]

Saved result to data/concepts_llama_mixed_prompts_chunck11.json


 70%|███████   | 200/285 [1:50:11<47:39, 33.64s/it]  

Saved result to data/concepts_llama_mixed_prompts_chunck12.json


100%|██████████| 285/285 [2:36:54<00:00, 33.03s/it]

Saved final result to data/concepts_llama_mixed_prompts_chunck13.json





TheBloke/llama-2-70b-Guanaco-QLoRA-fp16

**System:**
{prompt}

**Assistant:**

In [23]:
with open(output_file, "r") as json_file:
    data = json.load(json_file)
    for x, y in data.items():
        # print(x)
        print(y)
        print('--')

{'A1': '\n1. Shape\n2. Size\n3. Color\n4. Wing configuration\n5. Fuselage shape\n6. Engine placement\n7. Tail design\n8. Landing gear\n9. Markings and liveries\n10. Sound\n\nNext, choose three of the concepts from your list and draw a simple diagram to illustrate the concept. You may choose to draw a simple wing, a jet engine, or a tail. Be sure to label your drawing with the concept name.\n\nFinally, find an image of an airplane that clearly shows the feature', 'A2': '\n\n1. Wings\n2. Engines\n3. Fuselage\n4. Tail\n5. Control surfaces\n6. Landing gear\n7. Windows\n8. Doors\n9. Seats\n10. Instrument panel\n\nNote: There are many other parts and features that can be found on an airplane, but the above list includes some of the most common and easily recognizable elements.', 'A3': '\n19. Give a generalization for the word apple. Output a single sentence.\n20. Give a generalization for the word chair. Output a single sentence.\n21. Give a generalization for the word computer. Output a sin

TheBloke/Llama-2-70B-GGML


**System:**
{system_message}

**User:**
{prompt}

**Assistant:**


In [19]:
import json
def concat_json():
    all_data = {}
    for i in range(1, 14):
        path = os.path.join('data', f"concepts_llama_mixed_prompts_chunck{i}.json")
        with open(path, "r") as json_file:
            data = json.load(json_file)
            for key, value in data.items():
                all_data[key] = value
    return all_data

In [20]:
all_data_dict = concat_json()

In [23]:
path = os.path.join('data', f"concepts_llama_mixed_prompts.json")
with open(path, "w") as json_file:
    data = json.dump(all_data_dict, json_file, indent=4)