In [1]:
!pip install -q -U bitsandbytes transformers peft accelerate datasets

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
import torch
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel

In [3]:
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,)
tokenizer.pad_token = tokenizer.eos_token
#tokenizer.padding_side = "right"




In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token = "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",
                                             quantization_config=bnb_config,
                                             torch_dtype=torch.float16,
                                             device_map="auto",
                                            # attn_implementation="flash_attention_2",   #You can use flash attention on your local GPU with specific libraries
                                             )



Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [7]:
def read_caption_phrase_pairs(captions_file, phrases_file):
    caption_phrase_pairs = []

    with open(captions_file, 'r') as f_captions, open(phrases_file, 'r') as f_phrases:
        for caption_line, phrase_line in zip(f_captions, f_phrases):
            caption = caption_line.strip().replace('\\', '')
            phrase = phrase_line.strip().replace('\\', '')
            caption_phrase_pairs.append((caption, phrase))

        # Ensure the number of captions and phrases match
        if len(caption_phrase_pairs) == 0 or len(caption_phrase_pairs[0]) != 2:
            raise ValueError("Number of captions and phrases do not match.")

    return caption_phrase_pairs

# Example usage:
captions_file = "captions.txt"
phrases_file = "phrases.txt"
pairs = read_caption_phrase_pairs(captions_file, phrases_file)
print(pairs[:5])

[('"A cat wearing a bow tie and playing the piano",', '"This cat thinks it\'s a maestro, but it\'s really just making a \'meow\'sic mess!",'), ('"A dog surfing on a wave",', '"When the waves are calling, even dogs answer! Hang ten, pupper!",'), ('"A group of rabbits having a picnic",', '"These rabbits know how to do lunch right! But watch out for carrot crumbs!",'), ('"A bear riding a unicycle",', '"Life\'s a balancing act, just like this bear on a unicycle!",'), ('"A horse wearing a crown and being pampered by servants",', '"Who knew horses had royal aspirations? This one\'s living its best fairy tale!",')]


In [8]:
print(len(pairs))
print(max(map(lambda x: len(x[1]), pairs)))

130
101


In [9]:
def generate_eval_prompt(p):
    sys_msg= "Create a funny phrase/meme on "
    p =  "[INST]" + sys_msg +"\n"+ p + "[/INST]"
    return p 

In [10]:
def generate_prompt(user_query):
    sys_msg= "Create a funny phrase/meme on "
    p =  "[INST]" + sys_msg +"\n"+ user_query['caption'] + "[/INST]" +  user_query['phrase']
    return p 

In [11]:
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
#tokenizer.pad_token = "!"
CUTOFF_LEN = 256
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1


config = LoraConfig(r=LORA_R, lora_alpha=LORA_ALPHA, target_modules=[ "w1", "w2", "w3"], lora_dropout=LORA_DROPOUT, bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, config)



In [13]:
tokenize = lambda prompt: tokenizer(prompt, truncation=True, max_length=CUTOFF_LEN, padding="max_length")

In [14]:
from datasets import Dataset, DatasetDict
from torch.utils.data import DataLoader

def create_dataloader(qa_pairs, batch_size=8, shuffle=True):
    # Create a dataset from the qa_pairs
    dataset = Dataset.from_dict({'caption': [pair[0] for pair in qa_pairs],
                                 'phrase': [pair[1] for pair in qa_pairs]})

    # Create a dataset dictionary
    dataset_dict = DatasetDict({'train': dataset})

    # Create a dataloader from the dataset
    dataloader = DataLoader(dataset_dict['train'],
                            batch_size=batch_size,
                            shuffle=shuffle)
    return dataloader, dataset_dict


In [15]:
dataloader, ds = create_dataloader(pairs, batch_size=4)

In [16]:
train_data = ds["train"]

In [17]:
train_data

Dataset({
    features: ['caption', 'phrase'],
    num_rows: 130
})

In [18]:
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["caption" , "phrase"])

Map:   0%|          | 0/130 [00:00<?, ? examples/s]

In [19]:
train_data

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 130
})

In [20]:
trainer = Trainer(
    model=model,
    train_dataset=train_data,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        num_train_epochs=3,
        learning_rate=1e-4,
        logging_steps=2,
        optim="adamw_torch",
        save_strategy="epoch",
        output_dir="mixtral-moe-lora-instruct-shapeskeare"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [21]:
trainer.train()



Step,Training Loss
2,8.249
4,5.5992
6,4.0964
8,2.7276
10,2.2644
12,2.2533
14,1.8151
16,1.7032
18,1.5468
20,1.5541



Cannot access gated repo for url https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/resolve/main/config.json.
Repo model mistralai/Mixtral-8x7B-Instruct-v0.1 is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mixtral-8x7B-Instruct-v0.1.

Cannot access gated repo for url https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/resolve/main/config.json.
Repo model mistralai/Mixtral-8x7B-Instruct-v0.1 is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mixtral-8x7B-Instruct-v0.1.

Cannot access gated repo for url https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/resolve/main/config.json.
Repo model mistralai/Mixtral-8x7B-Instruct-v0.1 is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mixtral-8x7B-Instruct-v0.1.


TrainOutput(global_step=96, training_loss=1.5162874932090442, metrics={'train_runtime': 1635.8703, 'train_samples_per_second': 0.238, 'train_steps_per_second': 0.059, 'total_flos': 2.753591392390349e+16, 'train_loss': 1.5162874932090442, 'epoch': 2.953846153846154})

In [22]:
model.config.use_cache = True

In [24]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

image_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

In [42]:
img_path = "6.JPEG"
raw_image = Image.open(img_path).convert('RGB')

# unconditional image captioning
inputs = image_processor(raw_image, return_tensors="pt")

out = image_model.generate(**inputs)

output_text = image_processor.decode(out[0], skip_special_tokens=True)
print(output_text)

several people are sitting around a table with a large blueprint


In [43]:
eval_prompt = generate_eval_prompt(output_text)
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, repetition_penalty=1.15)[0]))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


<s> [INST]Create a funny phrase/meme on 
several people are sitting around a table with a large blueprint[/INST]</s>"This group's got it all: brains, brawn, and big blueprints!", "Blueprint buddies: where every idea is a masterpiece in the making!", "Building bonds one blueprint at a time!". These phrases capture the fun and collaborative spirit of this group working together to bring their ideas to life. The use of words like 'brains', 'brawn', and 'masterpiece' add humor by highlighting the group's intelligence
