# 1 Imports

In [None]:
# imports

import os
import re
import math
from tqdm import tqdm
from huggingface_hub import login
from dotenv import load_dotenv
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, set_seed
from peft import LoraConfig, PeftModel
from datetime import datetime

# 2 Connect to OpenAI & HuggingFace

In [None]:
# Load environment variables in a file called .env

load_dotenv()
api_key = os.getenv('OPEN_API_KEY')

In [None]:
# Load environment variables in a file called .env

load_dotenv()
hf_token = os.getenv('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# 3 Model Selection

## 3.1 Model Hyperparameters

In [None]:
# Constants

base_model_name = "Qwen/Qwen-7B"

# Hyperparameters
lora_r = 32
lora_alpha = 64
target_modules = ["q_proj", "v_proj", "k_proj", "o_proj"]

## 3.2 Model Load w/o Quantization

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto", trust_remote_code=True)

In [None]:
# Memory footprint
print(f"Memory footprint of the base model: {base_model.get_memory_footprint() / 1024 ** 3:.2f} GB")

In [None]:
base_model

## 3.3 Model Load using Quantization

In [None]:
quant_config = BitsAndBytesConfig(load_in_8bit=True)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True,
)

In [None]:
# Load the Tokenizer and Base Model using 4 bit

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    trust_remote_code=True,
)