In [None]:
import os

# Set the cache directory to your preferred path
os.environ['HF_HOME'] = '/cs/student/projects2/aisd/2024/shekchu/snlp'

# Access the cache directory using the environment variable
cache_dir = os.getenv('HF_HOME', 'Cache directory not set')
print(f"Model weights are stored in: {cache_dir}")


from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
import torch
from datasets import load_dataset
from torch.utils.data import Dataset

# Load your base model and tokenizer
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cuda",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
)
model.config.use_cache = False  # Disable cache for gradient checkpointing
model.gradient_checkpointing_enable()
model.eval()

# Import LoRA components from PEFT
from peft import LoraConfig, get_peft_model, TaskType

# Update the LoRA configuration to target the combined qkv projection module
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,                   # Rank for the LoRA update matrices
    lora_alpha=32,         # Scaling factor for LoRA
    lora_dropout=0.1,      # Dropout for LoRA layers
    target_modules=["qkv_proj"]  # Updated target module based on your model's architecture
)

# Wrap the model with the LoRA adapter
model = get_peft_model(model, lora_config)

# Prepare your dataset
dataset = load_dataset("FinGPT/fingpt-convfinqa")

# Create a custom dataset class for training
class FinQADataset(Dataset):
    def __init__(self, dataset, tokenizer):
        self.dataset = dataset
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        item = self.dataset[idx]
        # Format input as desired
        input_text = f"{item['instruction']}\n{item['input']}"
        target_text = item['output']
        
        # Tokenize input and target
        model_inputs = self.tokenizer(input_text, truncation=True, padding='max_length', max_length=512)
        target = self.tokenizer(target_text, truncation=True, padding='max_length', max_length=512)
        
        return {
            'input_ids': torch.tensor(model_inputs['input_ids']),
            'attention_mask': torch.tensor(model_inputs['attention_mask']),
            'labels': torch.tensor(target['input_ids'])
        }

# Instantiate the training dataset
train_dataset = FinQADataset(dataset['train'], tokenizer)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./lora-finetuned",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=50,
    # evaluation_strategy="steps",
)

# Initialize the Trainer with the LoRA-adapted model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)

# Start training
trainer.train()

In [1]:
import os
import json
import torch
from torch.utils.data import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer

# Set the cache directory to your preferred path
os.environ['HF_HOME'] = '/cs/student/projects2/aisd/2024/shekchu/snlp'
cache_dir = os.getenv('HF_HOME', 'Cache directory not set')
print(f"Model weights are stored in: {cache_dir}")

# Load your base model and tokenizer
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cuda",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
)
model.config.use_cache = False  # Disable cache for gradient checkpointing
model.gradient_checkpointing_enable()
model.eval()

# Import LoRA components from PEFT
from peft import LoraConfig, get_peft_model, TaskType

# Update the LoRA configuration to target the combined qkv projection module
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,                   # Rank for the LoRA update matrices
    lora_alpha=32,         # Scaling factor for LoRA
    lora_dropout=0.1,      # Dropout for LoRA layers
    target_modules=["qkv_proj"]  # Updated target module based on your model's architecture
)

# Wrap the model with the LoRA adapter
model = get_peft_model(model, lora_config)

# Define a custom Dataset class that reads from a local JSON file.
class FinQADataset(Dataset):
    def __init__(self, json_file, tokenizer, max_length=512):
        """
        Args:
            json_file (str): The filename of the JSON file (e.g., "train.json").
            tokenizer: The tokenizer to convert text to tokens.
            max_length (int): Maximum sequence length for tokenization.
        """
        # Create full path relative to this script's directory
        file_path = os.path.join(os.path.dirname(__file__), json_file)
        with open(file_path, 'r', encoding='utf-8') as f:
            self.data = json.load(f)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        # Construct input text. If your JSON uses different keys, adjust accordingly.
        # For example, if your JSON has "question" and "answer", you may want:
        # input_text = item.get("question", "")
        # target_text = item.get("answer", "")
        input_text = f"{item.get('instruction', '')}\n{item.get('input', '')}"
        target_text = item.get("output", "")

        # Tokenize input and target
        model_inputs = self.tokenizer(
            input_text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
        )
        target = self.tokenizer(
            target_text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
        )
        
        return {
            'input_ids': torch.tensor(model_inputs['input_ids']),
            'attention_mask': torch.tensor(model_inputs['attention_mask']),
            'labels': torch.tensor(target['input_ids'])
        }

# Instantiate the training dataset using the local train.json file
train_dataset = FinQADataset("./dataset/train.json", tokenizer)

# (Optional) If you want to load the test dataset, note that it might not have labels.
# For example:
# test_dataset = FinQADataset("test.json", tokenizer)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./lora-finetuned",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=50,
    # evaluation_strategy="steps",
)

# Initialize the Trainer with the LoRA-adapted model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
)

# Start training
trainer.train()

  from .autonotebook import tqdm as notebook_tqdm
  return torch._C._cuda_getDeviceCount() > 0


Model weights are stored in: /cs/student/projects2/aisd/2024/shekchu/snlp


ValueError: FlashAttention2 has been toggled on, but it cannot be used due to the following error: Flash Attention 2 is not available on CPU. Please make sure torch can access a CUDA device.