In [None]:
'''pip install torch transformers accelerate peft trl bitsandbytes datasets
pip uninstall -y torch torchvision torchaudio
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install -U torch==2.3.1
pip install torchvision==0.17.2
pip install --no-cache-dir bitsandbytes'''

In [None]:
# pip install --no-cache-dir bitsandbytes

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
import torch
import pandas as pd
from datasets import Dataset
from transformers import Trainer
import re

# Load tokenizer and configure padding
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# Configure model with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    quantization_config=bnb_config,
    device_map="auto"
)

# Define LoRA config targeting specific modules
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "v_proj"]  # Target key attention projections
)

model = get_peft_model(model, lora_config)
model.gradient_checkpointing_enable()
model.config.use_cache = False
model.enable_input_require_grads()

# Load and format dataset
df = pd.read_parquet("hf://datasets/yxsllgz-uts-org/Math_Consistency-Probability-Llama-3.2-1B-Instruct-style1/data/math-00000-of-00001.parquet")
df = df[['problem', 'm_solution', 'solution']]

def format_prompt(sample):
    return {"text": f"User: {sample['problem']}\n\nAssistant: {sample['m_solution']}\n\n{sample['solution']}"}

dataset = Dataset.from_list(df.apply(format_prompt, axis=1).to_list())


def remove_repeated_sentences(text):
    """Removes repeated sentences while preserving order."""
    sentences = re.split(r'(?<!\\)\. |\n+', text.strip())  # Split by periods or newlines
    seen = set()
    filtered_sentences = []

    for sentence in sentences:
        sentence = sentence.strip()
        if sentence and sentence not in seen:
            seen.add(sentence)
            filtered_sentences.append(sentence)

    return ". ".join(filtered_sentences)  # Join sentences back

def tokenize_function(examples):
    """Cleans up text by removing duplicate sentences and then tokenizes it."""
    cleaned_texts = [remove_repeated_sentences(text) for text in examples["text"]]  # ✅ Handle batched input
    return tokenizer(
        cleaned_texts,  # Process batch
        truncation=True,
        max_length=1024,
        return_special_tokens_mask=False
    )

# ✅ Apply tokenization and REMOVE TEXT COLUMN
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset.column_names  # Remove original text column
)

# Dynamic padding via data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [None]:
# Configure training arguments with 8-bit optimizer
training_args = TrainingArguments(
    output_dir="./qlora_deepseek",
    per_device_train_batch_size=2,          # Keep batch size at 1
    gradient_accumulation_steps=4,          # Accumulate gradients
    learning_rate=2e-4,
    fp16=True,
    optim="adamw_hf",
    #optim="adamw_bnb_8bit",                 # Use memory-efficient optimizer
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none",
    remove_unused_columns=False             # Important for custom data collation
)

# Initialize Trainer with data collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,            # Dynamic padding
    eval_dataset=tokenized_dataset.select(range(100))
)

trainer.train()

model.save_pretrained("qlora_deepseek-7B")

In [None]:
'''# runnable
import torch
from langchain_community.llms import HuggingFacePipeline
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Set model path
model_path = "/kaggle/input/deepseek-r1/transformers/deepseek-r1-distill-qwen-14b/1"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Create a Hugging Face pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=2084,
    temperature=0.7,
    device_map="auto"
)

# Wrap the pipeline in LangChain's HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

# Define the system prompt for math and AI/ML/DL
system_prompt = """You are an expert in mathematics and AI/ML/DL. You assist with:
1. Mathematical concepts, proofs, and problem-solving.
2. AI/ML/DL theory, algorithms, and applications.
3. Code implementation for AI/ML/DL projects.
4. Debugging and optimizing AI/ML/DL workflows.

Provide clear, concise, and accurate responses. If the question is unclear, ask for clarification."""

# Set up the chat prompt template
prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    HumanMessagePromptTemplate.from_template("{input}")
])

# Use latest LangChain API (RunnableSequence)
chat_chain = prompt_template | llm

# Chat loop
def start_chat():
    print("Math & AI/ML/DL Assistant - Type 'exit' to quit")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break
        response = chat_chain.invoke({"input": user_input})
        print(f"AI: {response}")

# Start chat session
start_chat()
'''

In [None]:
'''# pip install --target=/kaggle/working langchain_community
import os
import torch
from langchain_community.llms import VLLM
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from vllm import SamplingParams


# Set environment variables (optional but recommended)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # Adjust based on available GPUs
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid tokenizer parallelism conflicts


model_path = "/kaggle/input/deepseek-r1/transformers/deepseek-aideepseek-r1-distill-qwen-14b-awq-neody/1"


# Sampling Parameters for vLLM
sampling_params = SamplingParams(
    temperature=1.0,              # Control randomness
    min_p=0.01,                   # Minimum cumulative probability for nucleus sampling
    skip_special_tokens=True,      # Remove special tokens from output
    max_tokens=8192,               # Maximum output length
)

# ✅ Use LangChain’s VLLM wrapper
llm = VLLM(
    model=model_path,
    dtype="half",                 # Use FP16 for efficiency
    tensor_parallel_size=2,        # Use multiple GPUs (adjust as needed)
    trust_remote_code=True,        # Trust remote execution for model and tokenizer
    max_num_seqs=16,               # Maximum batch size per iteration
    max_model_len=8192,            # Context length
    gpu_memory_utilization=0.95,   # Use 95% of GPU memory
    sampling_params=sampling_params # Pass the sampling params
)

# Define system prompt for AI/ML/DL assistance
system_prompt = """You are an expert in mathematics and AI/ML/DL. You assist with:
1. Mathematical concepts, proofs, and problem-solving.
2. AI/ML/DL theory, algorithms, and applications.
3. Code implementation for AI/ML/DL projects.
4. Debugging and optimizing AI/ML/DL workflows.

Provide clear, concise, and accurate responses. If the question is unclear, ask for clarification."""

# Define chat prompt template
prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    HumanMessagePromptTemplate.from_template("{input}")
])

# ✅ Create LangChain Chat Pipeline with vLLM
chat_chain = prompt_template | llm

# Interactive Chat Loop
def start_chat():
    print("Math & AI/ML/DL Assistant - Type 'exit' to quit")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break
        response = chat_chain.invoke({"input": user_input})
        print(f"AI: {response}")

# Start Chat
start_chat()
'''

In [None]:
start_chat()