In [None]:
pip install transformers datasets accelerate bitsandbytes torch peft trl PyPDF2 scikit-learn pandas

In [None]:
!huggingface-cli login --token 

In [3]:
import warnings
warnings.filterwarnings("ignore")

import torch
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, logging as transformers_logging
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.tensorboard import SummaryWriter
import os

transformers_logging.set_verbosity_error()

In [4]:
# Create directories if they don't exist
os.makedirs("models/checkpoints", exist_ok=True)
os.makedirs("models/final", exist_ok=True)
os.makedirs("logs", exist_ok=True)

In [5]:
def setup_model():
   bnb_config = BitsAndBytesConfig(
       load_in_4bit=True,
       bnb_4bit_quant_type="nf4",
       bnb_4bit_compute_dtype=torch.bfloat16,
       bnb_4bit_use_double_quant=False
   )
   
   model = AutoModelForCausalLM.from_pretrained(
       "mistralai/Mistral-7B-Instruct-v0.2",
       quantization_config=bnb_config,
       device_map="auto",
       torch_dtype=torch.bfloat16,
       trust_remote_code=True
   )
   
   tokenizer = AutoTokenizer.from_pretrained(
       "mistralai/Mistral-7B-Instruct-v0.2",
       trust_remote_code=True
   )
   tokenizer.pad_token = tokenizer.eos_token
   tokenizer.padding_side = "right"
   
   model.gradient_checkpointing_enable()
   model.enable_input_require_grads()
   
   return model, tokenizer

In [6]:
def setup_lora(model):
   lora_config = LoraConfig(
       r=64,
       lora_alpha=128,
       target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", 
                    "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj"],
       lora_dropout=0.1,
       bias="none",
       task_type="CAUSAL_LM"
   )
   return get_peft_model(model, lora_config)

In [7]:
def prepare_dataset(df):
   formatted_data = []
   for _, row in df.iterrows():
       formatted_data.append({
           "text": f"<s>[INST]@ESE577 Course Question. Provide a detailed answer using course-specific terminology and examples. Question: {row['question']} [/INST]Answer: {row['answer']}</s>"
       })
   return Dataset.from_list(formatted_data)

In [8]:
from transformers import TrainerCallback

class MetricsCallback(TrainerCallback):
    def __init__(self, writer):
        self.writer = writer
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs:
            for k, v in logs.items():
                if isinstance(v, (int, float)):
                    self.writer.add_scalar(k, v, state.global_step)

In [9]:
def train_model(train_df, val_df):
   model, tokenizer = setup_model()
   model = setup_lora(model)
   
   train_dataset = prepare_dataset(train_df)
   val_dataset = prepare_dataset(val_df)
   
   writer = SummaryWriter(log_dir="logs/tensorboard")
   
   training_args = TrainingArguments(
       output_dir="models/checkpoints",
       num_train_epochs=6,
       per_device_train_batch_size=2,
       gradient_accumulation_steps=32,
       learning_rate=3e-4,
       warmup_ratio=0.2,
       weight_decay=0.1,
       bf16=True,
       logging_steps=10,
       save_strategy="epoch",
       evaluation_strategy="epoch",
       load_best_model_at_end=True,
       optim="adamw_torch",
       lr_scheduler_type="cosine_with_restarts",
       report_to=["tensorboard"],
       logging_dir="logs/training"
   )
   
   trainer = SFTTrainer(
       model=model,
       train_dataset=train_dataset,
       eval_dataset=val_dataset,
       tokenizer=tokenizer,
       args=training_args,
       max_seq_length=1024
   )
   
   trainer.add_callback(MetricsCallback(writer))
   
   print("Starting training...")
   trainer.train()
   
   print("Saving final model...")
   trainer.save_model("models/final")
   
   writer.close()
   return model

In [10]:
if __name__ == "__main__":
   print("Loading dataset...")
   train_df = pd.read_csv('/kaggle/input/shuffled-data/shuffled_file.csv')
   
   print("Splitting dataset...")
   train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
   
   print(f"Training samples: {len(train_df)}")
   print(f"Validation samples: {len(val_df)}")
   
   model = train_model(train_df, val_df)
   print("\nTraining completed! Model saved in models/final")
   print("View training metrics with: tensorboard --logdir=logs/tensorboard")

Loading dataset...
Splitting dataset...
Training samples: 319
Validation samples: 80


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Map:   0%|          | 0/319 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Starting training...
{'eval_loss': 1.3730199337005615, 'eval_runtime': 60.5827, 'eval_samples_per_second': 1.321, 'eval_steps_per_second': 0.165, 'epoch': 1.0}
{'loss': 1.8246, 'grad_norm': 42.466888427734375, 'learning_rate': 0.0002799038105676658, 'epoch': 2.0}
{'eval_loss': 1.0979886054992676, 'eval_runtime': 60.5766, 'eval_samples_per_second': 1.321, 'eval_steps_per_second': 0.165, 'epoch': 2.0}
{'eval_loss': 1.0811164379119873, 'eval_runtime': 60.5821, 'eval_samples_per_second': 1.321, 'eval_steps_per_second': 0.165, 'epoch': 3.0}
{'loss': 0.4099, 'grad_norm': 43.64652633666992, 'learning_rate': 0.00011117714323462186, 'epoch': 4.0}
{'eval_loss': 1.410768985748291, 'eval_runtime': 60.5786, 'eval_samples_per_second': 1.321, 'eval_steps_per_second': 0.165, 'epoch': 4.0}
{'eval_loss': 1.4830907583236694, 'eval_runtime': 60.5804, 'eval_samples_per_second': 1.321, 'eval_steps_per_second': 0.165, 'epoch': 5.0}
{'loss': 0.0884, 'grad_norm': 10.76285171508789, 'learning_rate': 0.0, 'epoch

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

def load_model():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=False
    )
    
    base_model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        quantization_config=bnb_config,
        device_map="auto",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    )
    
    tokenizer = AutoTokenizer.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    
    model = PeftModel.from_pretrained(base_model, "/kaggle/input/finetuned_model/pytorch/default/1/final")
    return model, tokenizer

def generate_response(model, tokenizer, question):
    prompt = f"""<s>[INST]@ESE577. For multiple choice questions:
1. Start with "Answer: [letter]"
2. Reference specific course sections
3. Explain using course examples
4. Connect to course concepts

Question: {question} [/INST]"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=1024,
        temperature=0.7,
        do_sample=True,
        top_p=0.9,
        top_k=40,
        num_beams=2,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("[/INST]")[-1].strip()

def chat():
   print("Loading model...")
   model, tokenizer = load_model()
   print("Model loaded! Ask your questions (type 'exit' to quit)")
   
   while True:
       question = input("\nQuestion: ")
       if question.lower() == 'exit':
           break
       try:
           response = generate_response(model, tokenizer, question)
           print("\nResponse:")
           print("=" * 50)
           print(response)
       except Exception as e:
           print(f"Error: {e}")

if __name__ == "__main__":
   chat()