In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from peft import (
    LoraConfig,
    PeftModel,
    TaskType,
    get_peft_model,
    prepare_model_for_kbit_training,
)

In [36]:
!pip install -upgrade peft


Usage:   
  pip install [options] <requirement specifier> [package-index-options] ...
  pip install [options] -r <requirements file> [package-index-options] ...
  pip install [options] [-e] <vcs project url> ...
  pip install [options] [-e] <local project path> ...
  pip install [options] <archive url/path> ...

no such option: -u


## Model loading

## Model Evaluation

In [13]:
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
HUG_MODEL = "Preethi-1995/Llama-3-8B-Instruct-SQUAD"
tokenizer = AutoTokenizer.from_pretrained(HUG_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
)
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
model = PeftModel.from_pretrained(model, HUG_MODEL)
model = model.merge_and_unload() 

Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]


**To upload the model to Azure blob storage**

In [None]:
!pip install pyyaml


In [None]:
!pip install azure-storage-blob

In [14]:
from azure.storage.blob import ContainerClient
import os
import yaml

In [7]:
def get_files(dir):
    with os.scandir(dir) as entries:
        for entry in entries:
            if entry.is_file() and not entry.name.startswith('.'):
                yield entry

In [17]:
dir = '/home/azureuser/cloudfiles/code/Users/EzhilKrishna/mnt/outputs/checkpoint-6160'
files = get_files(dir)
print(*files)

<DirEntry 'adapter_config.json'> <DirEntry 'adapter_model.safetensors'> <DirEntry 'optimizer.pt'> <DirEntry 'README.md'> <DirEntry 'rng_state.pth'> <DirEntry 'scheduler.pt'> <DirEntry 'special_tokens_map.json'> <DirEntry 'tokenizer.json'> <DirEntry 'tokenizer_config.json'> <DirEntry 'trainer_state.json'> <DirEntry 'training_args.bin'>


In [None]:
def load_config():
    dir_root = os.path.dirname(os.path.abspath(__file__))
    with open(dir_root + '/config.yaml', 'r') as yamlfile:
        return yaml.load(yamlfle, Loader=yaml.FullLoader)

In [20]:
def upload(files, connection_string, container_name):
    container_client = ContainerClient.from_connection_string(connection_string, container_name)
    print("Uplaoding files to blob storage...")

    for file in files:
        print(file)
        blob_client = container_client.get_blob_client(file.name)
        with open(file.path, 'rb') as data:
            blob_client.upload_blob(data)
            print(f'{file.name} uploaded to blob storage')
            os.remove(file)
            print(f'{file.name} removed from')



In [22]:
# Define your Blob storage connection string
connection_string = "DefaultEndpointsProtocol=https;AccountName=finetuningwork4299623425;AccountKey=csbb/r/kquzkPpuwf/nfIGw8GBnWUqGgFrPgLmp0YCNySmb9Rj4MCxRWNamz6j2c3a8DhGe+mwAF+AStpx1oHg==;EndpointSuffix=core.windows.net"  # Replace with your storage account connection string
#container_name = "finetuningwork4299623425" 
container_name = "model-repo" 

#upload(files, connection_string, container_name)

In [24]:
from azure.storage.blob import BlobServiceClient
from azure.storage.blob import ContainerClient
import os

# Initialize the Blob Service Client
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)

# Define the directory in ML Studio with files to upload
local_directory = dir  # Replace with the ML Studio file path
repository_name = 'squad_qa_f1_76'

# Upload all files from the directory to the Blob container
for root, dirs, files in os.walk(local_directory):
    for file_name in files:
        file_path = os.path.join(root, file_name)
        relative_path = os.path.relpath(file_path, local_directory)
        blob_name = f"{repository_name}/{relative_path}"
        
        # Create a Blob client
        blob_client = container_client.get_blob_client(blob_name)
        
        # Upload file to Blob storage
        with open(file_path, "rb") as data:
            blob_client.upload_blob(data, overwrite=True)
        
        print(f"Uploaded: {file_name} to {container_name}/{blob_name}")

Uploaded: adapter_config.json to model-repo/squad_qa_f1_76/adapter_config.json
Uploaded: adapter_model.safetensors to model-repo/squad_qa_f1_76/adapter_model.safetensors
Uploaded: optimizer.pt to model-repo/squad_qa_f1_76/optimizer.pt
Uploaded: README.md to model-repo/squad_qa_f1_76/README.md
Uploaded: rng_state.pth to model-repo/squad_qa_f1_76/rng_state.pth
Uploaded: scheduler.pt to model-repo/squad_qa_f1_76/scheduler.pt
Uploaded: special_tokens_map.json to model-repo/squad_qa_f1_76/special_tokens_map.json
Uploaded: tokenizer.json to model-repo/squad_qa_f1_76/tokenizer.json
Uploaded: tokenizer_config.json to model-repo/squad_qa_f1_76/tokenizer_config.json
Uploaded: trainer_state.json to model-repo/squad_qa_f1_76/trainer_state.json
Uploaded: training_args.bin to model-repo/squad_qa_f1_76/training_args.bin
