In [1]:
! rm -rf /opt/conda/lib/python3.10/site-packages/aiohttp-3.9.1.dist-info

In [2]:
!pip install -qU transformers datasets accelerate bitsandbytes peft trl accelerate langchain_core langchain flash_attn

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.4.1 requires cubinlinker, which is not installed.
cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.4.1 requires ptxcompiler, which is not installed.
cuml 24.4.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
keras-cv 0.9.0 requires keras-core, which is not installed.
keras-nlp 0.12.1 requires keras-core, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.
apache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 14.0.2 which is incompatible

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from torch import cuda
import torch


device = f'cuda :{cuda.current_device()}' if cuda.is_available() else  'cpu'
hf_auth = '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'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,            # load model in 4-bit precision
    bnb_4bit_quant_type="nf4",    # pre-trained model should be quantized in 4-bit NF format
    bnb_4bit_use_double_quant=True, # Using double quantization as mentioned in QLoRA paper
    bnb_4bit_compute_dtype=torch.bfloat16, # During computation, pre-trained model should be loaded in BF16 format
)

# Load pre-trained Mistral model and tokenizer
model_name = "mistralai/Mistral-7B-v0.3"
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_auth, quantization_config=bnb_config,
                                             device_map="auto", use_cache=False )
tokenizer = AutoTokenizer.from_pretrained(model_name , use_auth_token=hf_auth , trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

In [None]:
import pandas as pd 
path = '/kaggle/input/testcsv/documents.csv'
df = pd.read_csv(path)

def tokenize_function(examples):
    return tokenizer(examples["page_content"], padding="max_length", truncation=True)

# Convert your DataFrame to a Dataset
from datasets import Dataset
dataset = Dataset.from_pandas(df)

# Map the tokenization function to the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

In [None]:
tokenized_dataset

In [None]:
from peft import LoraConfig, get_peft_model
import bitsandbytes as bnb

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank of the low-rank adaptation
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.02,  # Dropout rate for LoRA
    bias="none",  # setting to 'none' for only training weight params instead of biases
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

In [None]:
# Freeze all layers except LoRA layers
for name, param in model.named_parameters():
    if 'lora_' not in name:
        param.requires_grad = False

In [None]:
from transformers import Trainer, TrainingArguments
from trl import SFTTrainer

cuda.empty_cache()
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,     # Number of training epochs
    per_device_train_batch_size=1,  # Batch size for training 
    per_device_eval_batch_size=1,   # Batch size for evaluation        
    warmup_steps=500,              # Number of warmup steps (optional)
    weight_decay=0.01,
    save_steps=10,
    fp16=True,                     # Use mixed precision
    push_to_hub=False,
    save_strategy="epoch",        # Save checkpoint after each epoch
)

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    args=training_params,
    dataset_text_field="text"  # This field corresponds to the column containing text data
)
trainer.train()

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate

prompt = PromptTemplate(
    template="{question}", input_variables=["question"],)

def generate_text(query_text : str , model, tokenizer):
    
    prompt_text = prompt.format(question=query_text)
    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_length=10000,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    formatted_response = f"""
    --> Assistant Response

    -->✨ User Question:
    {query_text}

    -->✅ Answer:
    
    {text} 

    """
    print(formatted_response)
    return text

question = 'quel sont les formations de la facultédecrire la Formation continue / DCA Ingénierie topographique et systèmes de information géographiques appliqués'

generate_text(question, model, tokenizer)