In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

In [3]:
import os
SUPABASE_URL = user_secrets.get_secret("SUPABASE_URL")
SUPABASE_KEY = user_secrets.get_secret("SUPABASE_KEY")

os.environ["NOMIC_API_KEY"] = user_secrets.get_secret("NOMIC_API_KEY")

In [None]:
!pip cache purge

In [4]:
!pip install -q supabase datasets torch peft accelerate wandb huggingface_hub rouge_score mlflow fsspec==2024.10.0 async-timeout==4.0.3

In [5]:
!pip install transformers

In [5]:
from supabase import create_client, Client
from typing import List, Dict

In [6]:
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

In [7]:
from huggingface_hub import login

login(token=user_secrets.get_secret("HUGGINGFACE_TOKEN"))

In [8]:
def fetch_conversation_data(supabase: Client) -> List[Dict]:
    try:
        response = (
            supabase.table("conversations")
            .select("query, response, conversation_document_chunks(document_chunks(chunk_content))")
            .execute()
        )

        result = []
        for conversation in response.data:
            conversation_data = {
                "query": conversation["query"],
                "response": conversation["response"],
                "context": []
            }

            # Extract chunk_content from related document_chunks
            for cdc in conversation["conversation_document_chunks"]:
                if "document_chunks" in cdc and cdc["document_chunks"]:
                    conversation_data["context"].append(cdc["document_chunks"]["chunk_content"])

            result.append(conversation_data)

        return result

    except Exception as e:
        print(f"Error fetching data: {e}")
        return []

In [9]:
data_for_finetuning = fetch_conversation_data(supabase)

In [10]:
import random

def split_dataset(dataset):
    total_size = len(dataset)
    train_size = int(0.8 * total_size)
    val_size = int(0.1 * total_size)
    test_size = total_size - train_size - val_size

    random.shuffle(dataset)

    train_data = dataset[:train_size]
    val_data = dataset[train_size:train_size + val_size]
    test_data = dataset[train_size + val_size:]

    return train_data, val_data, test_data

In [11]:
data_for_finetuning[4]['context'][0]

In [12]:
training_data, validation_data, test_data = split_dataset(data_for_finetuning)


In [13]:
len(training_data), len(validation_data), len(test_data)


In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
baseline_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto", trust_remote_code=True)

In [15]:
def get_query(row):
    sys_prompt = """
    You are an AI agent tasked with answering technical questions for IT Software systems. Your target audience will 
    generally be developers and engineers but occasionally technical managers so answer questions accordingly.

    You will generally be provided with some context elements and your priority will be to answer questions based on the context provided.
    You are to avoid negative or speculative responses, and prioritize factual information over assumption.

    Answer the questions as comprehensively as possible.
    """

    context_text = "\n".join(row["context"])
    prompt = f"""
    Context: 
    {context_text}
    
    Query:
    {row["query"]}
    """

    messages = [
        {"role" : "system", "content" : sys_prompt},
        {"role" : "user", "content" : prompt },
        {"role" : "assistant", "content" : row["response"]}
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize = False,
        add_generation_prompt=False
    )

    return text

In [16]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type='CAUSAL_LM'
)

model_for_finetuning = get_peft_model(baseline_model, lora_config)
model_for_finetuning.train()

In [17]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device=torch.device("cpu")
print(f"Using device: {device}")

In [18]:
from datasets import load_dataset, Dataset


train_dataset = Dataset.from_list(training_data)
val_dataset = Dataset.from_list(validation_data)
test_dataset = Dataset.from_list(test_data)

def preprocess_data(example):
    query = get_query(example)
    
    query_tokens = tokenizer(
        query,
        return_tensors="pt",
        max_length=1024,
        padding="max_length",
        truncation=True
    ).to(device)
    
    input_ids = query_tokens["input_ids"].squeeze(0)
    attention_mask = query_tokens["attention_mask"].squeeze(0)

    labels = input_ids.clone()

    assistant_start_token = tokenizer.encode("assistant", add_special_tokens=False)[0]
    assistant_idx = (input_ids == assistant_start_token).nonzero(as_tuple=True)[0]
    if len(assistant_idx) > 0:
        response_start = assistant_idx[0] + 1
        labels[:response_start] = -100
    else:
        labels[:] = -100

    labels[input_ids == tokenizer.pad_token_id] = -100
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }


tokenized_train_dataset = train_dataset.map(preprocess_data, remove_columns=['query', 'response', 'context'])
tokenized_val_dataset = val_dataset.map(preprocess_data, remove_columns=['query', 'response', 'context'])
tokenized_test_dataset = test_dataset.map(preprocess_data, remove_columns=['query', 'response', 'context'])

In [19]:
tokenized_train_dataset

In [20]:
print(len(tokenized_train_dataset[0]["input_ids"]))
print(len(tokenized_train_dataset[0]["attention_mask"]))
print(len(tokenized_train_dataset[0]["labels"]))

In [21]:
import wandb
wandb.login(key=user_secrets.get_secret("WANDB_API_KEY"))

In [22]:
learning_rate=2e-4
num_train_epochs= 3
per_device_train_batch_size=1
gradient_accumulation_steps= 8
fp16=False
logging_steps=1
output_dir="./promptly-finetune"

In [23]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps, 
    learning_rate=learning_rate,
    num_train_epochs=num_train_epochs,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    fp16=fp16,
    remove_unused_columns=False,
    logging_strategy="steps",
    logging_steps=logging_steps,
    dataloader_num_workers=0,
    push_to_hub=True,
    hub_model_id="RevLash/promptly-tuned"
)

trainer = Trainer(
    model=model_for_finetuning,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    # data_collator=data_collator,
)
print(f"Training on device: {next(model_for_finetuning.parameters()).device}")

try:
    trainer.train()
    trainer.save_model("promptly-tuned")

    # Log model checkpoint to MLflow
    mlflow.pytorch.log_model(model_for_finetuning, "models/fine-tuned-qwen")
except Exception as e:
    print(f"Training failed with error: {e}")

In [24]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
baseline_model_for_comparison = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto", trust_remote_code=True)

In [25]:
baseline_model_for_comparison.eval()


In [26]:
def generate_response(model, tokenizer, query, max_new_tokens=512):
    
    inputs = tokenizer(query, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            do_sample=False,  # Use greedy decoding for consistency
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    response = generated_text.split("assistant\n")[1]
    
    return response

In [27]:
from rouge_score import rouge_scorer
import pandas as pd


scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

quantitative_results = []
qualitative_examples = []

model_for_finetuning.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
baseline_model_for_comparison = baseline_model_for_comparison.to(device)
for idx, example in enumerate(test_dataset):
    print(idx)
    
    query = get_query(example)
    ground_truth = example["response"]
    
    
    baseline_response = generate_response(baseline_model_for_comparison, tokenizer, query)
    finetuned_response = generate_response(model_for_finetuning, tokenizer, query)
    
    
    baseline_scores = scorer.score(ground_truth, baseline_response)
    finetuned_scores = scorer.score(ground_truth, finetuned_response)
    
    
    quantitative_results.append({
        "example_id": idx,
        "baseline_rouge1": baseline_scores['rouge1'].fmeasure,
        "baseline_rouge2": baseline_scores['rouge2'].fmeasure,
        "baseline_rougeL": baseline_scores['rougeL'].fmeasure,
        "finetuned_rouge1": finetuned_scores['rouge1'].fmeasure,
        "finetuned_rouge2": finetuned_scores['rouge2'].fmeasure,
        "finetuned_rougeL": finetuned_scores['rougeL'].fmeasure,
    })
    
    if idx < 3:
        qualitative_examples.append({
            "example_id": idx,
            "query": example["query"],
            "ground_truth": ground_truth,
            "baseline_response": baseline_response,
            "finetuned_response": finetuned_response
        })


quantitative_df = pd.DataFrame(quantitative_results)
average_row = {
    "example_id": "average",
    "baseline_rouge1": quantitative_df["baseline_rouge1"].mean(),
    "baseline_rouge2": quantitative_df["baseline_rouge2"].mean(),
    "baseline_rougeL": quantitative_df["baseline_rougeL"].mean(),
    "finetuned_rouge1": quantitative_df["finetuned_rouge1"].mean(),
    "finetuned_rouge2": quantitative_df["finetuned_rouge2"].mean(),
    "finetuned_rougeL": quantitative_df["finetuned_rougeL"].mean(),
}

quantitative_df = pd.concat([quantitative_df, pd.DataFrame([average_row])], ignore_index=True)
qualitative_df = pd.DataFrame(qualitative_examples)

In [28]:
print("Quantitative Results (ROUGE Scores):")
quantitative_df

In [29]:
print("\nQualitative Results (First 3 Examples):")
qualitative_df

In [30]:
qualitative_df['query'][0]

In [31]:
qualitative_df['ground_truth'][0]

In [32]:
qualitative_df['baseline_response'][0]

In [33]:
qualitative_df['finetuned_response'][0]

In [34]:
'''
---------- x ---------- x ----------
Setting Up MLFlow
'''

import mlflow
import mlflow.pytorch
import numpy as np
import datetime

# Set up MLflow tracking
mlflow.set_tracking_uri("http://34.9.77.249:5000/") # MlFlow Compute Engine
artifact_path = "models"
experiment_name = "Promptly"

# Checking for experiment
existing_experiment = mlflow.get_experiment_by_name(experiment_name)

if existing_experiment:
    mlflow.set_experiment(experiment_name)
    print(f"Experiment '{experiment_name}' already exists. Using the existing experiment.")
else:
    new_experiment = mlflow.create_experiment(experiment_name)
    mlflow.set_experiment(experiment_name)
    print(f"Experiment '{experiment_name}' does not exist. Creating a new experiment.")

# Generate run name
curr_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
run_name = f"model_run_{curr_time}"

In [35]:
average_row

In [36]:
with mlflow.start_run(run_name=run_name) as run:
    mlflow.log_params({
        "learning_rate": learning_rate,
        "num_train_epochs": num_train_epochs,
        "per_device_train_batch_size": per_device_train_batch_size,
        "gradient_accumulation_steps": gradient_accumulation_steps,
        "fp16": fp16,
        "logging_steps": logging_steps,
        "output_dir": output_dir
    })

    # Filter out non-numeric values
    numeric_metrics = {k: v for k, v in average_row.items() if isinstance(v, (int, float))}
    
    mlflow.log_metrics(numeric_metrics)

    # Log model checkpoint to MLflow
    print("Logging model checkpoint...")
    mlflow.pytorch.log_model(model_for_finetuning, "models/fine-tuned-qwen")
    print("Model logged successfully.")

    mlflow.end_run()

In [37]:
import os
import torch
import mlflow
import tarfile
from mlflow.utils.rest_utils import http_request

# Increase MLflow timeout and retries to handle large uploads
http_request.default_max_retries = 10
http_request.default_timeout = 300  # 5 minutes timeout

# Paths
model_dir = "models/fine-tuned-qwen"
compressed_model_path = "models/fine-tuned-qwen.tar.gz"

# Ensure directory exists
os.makedirs(model_dir, exist_ok=True)

# Save the Qwen model (only the state_dict to save space)
def save_model(model, model_path):
    print("Saving model locally...")
    torch.save(model.state_dict(), os.path.join(model_path, "model.pth"))
    print(f"Model saved to {model_path}")

# Compress model directory to reduce size
def compress_model(input_dir, output_file):
    print("Compressing model for upload...")
    with tarfile.open(output_file, "w:gz") as tar:
        tar.add(input_dir, arcname=os.path.basename(input_dir))
    print(f"Model compressed to {output_file}")

# Main function to log model and parameters
def log_model_to_mlflow(model, run_name="fine-tuned-qwen"):
    # Save and compress the model
    save_model(model, model_dir)
    compress_model(model_dir, compressed_model_path)

    # Start MLflow run
    with mlflow.start_run(run_name=run_name) as run:
        print("Logging parameters...")
        mlflow.log_params({
            "learning_rate": learning_rate,
            "num_train_epochs": num_train_epochs,
            "per_device_train_batch_size": per_device_train_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "fp16": fp16,
            "logging_steps": logging_steps,
            "output_dir": output_dir
        })

        print("Logging metrics...")
        # Filter out non-numeric values
        numeric_metrics = {k: v for k, v in average_row.items() if isinstance(v, (int, float))}
        
        mlflow.log_metrics(numeric_metrics)

        print("Logging model artifacts...")
        mlflow.log_artifact(compressed_model_path, artifact_path="models")

        print("Model and metadata logged successfully.")

# Example Usage
if __name__ == "__main__":
    eval_metrics = {
        "train_loss": 0.45,
        "val_loss": 0.32,
        "accuracy": 91.5
    }

    log_model_to_mlflow(model_for_finetuning)


In [None]:
import requests
import os

def notify_github_after_model_push():
    github_token = user_secrets.get_secret("GITHUB_TOKEN")
    if not github_token:
        raise ValueError("GITHUB_TOKEN environment variable is not set.")

    repo_owner = "RajivShah1798"
    repo_name = "promptly"
    workflow_filename = "build_deploy_model_vertex_ai.yaml"
    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/actions/workflows/{workflow_filename}/dispatches"

    headers = {
        "Authorization": f"Bearer {github_token}",
        "Accept": "application/vnd.github+json"
    }

    data = {
        "ref": "main"  # Ensure this matches the branch you want to run the workflow on
    }

    response = requests.post(url, headers=headers, json=data)
    print(f"GitHub notification status: {response.status_code}")
    print(f"Response: {response.text}")

notify_github_after_model_push()