In [3]:
import torch
print(torch.__version__)  # Should show a version like 2.x.x
print(torch.version.cuda)  # Should match your CUDA version, like 12.8
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Should return at least 1
print(torch.cuda.get_device_name(0))  # Should show RTX 3070 Ti
print(torch.version.cuda)  # Check installed CUDA version
print(torch.backends.cudnn.version())  # Check cuDNN version


2.6.0+cu118
11.8
True
1
NVIDIA GeForce RTX 3070 Ti
11.8
90100


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from datasets import load_dataset


In [5]:
model_name = "mistralai/Mistral-7B-v0.1"

# Enable 4-bit quantization to reduce memory usage
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    quantization_config=bnb_config, 
    device_map="auto"
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("Model and tokenizer loaded successfully.")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model and tokenizer loaded successfully.


In [6]:
# Load English to French translation dataset
dataset = load_dataset("opus_books", "en-fr")

# Function to preprocess dataset
def format_data(example):
    translation = example["translation"]
    return {
        "input": f"Translate: English → French\nEnglish: {translation['en']}\nFrench:",
        "target": translation["fr"]
    }

train_data = dataset["train"].map(format_data, batched=False)

# Print first example
print(train_data[0])


{'id': '0', 'translation': {'en': 'The Wanderer', 'fr': 'Le grand Meaulnes'}, 'input': 'Translate: English → French\nEnglish: The Wanderer\nFrench:', 'target': 'Le grand Meaulnes'}


In [7]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none"
)

# Apply LoRA
peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()

print("LoRA applied successfully.")


trainable params: 6,815,744 || all params: 7,248,547,840 || trainable%: 0.0940
LoRA applied successfully.


In [8]:
# Ensure CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Move model to device
peft_model = peft_model.to(device)

print(f"Model moved to {device} successfully.")


Model moved to cuda successfully.


In [11]:
sample_text = "Translate: English → French\nEnglish:  my name is khaled i am software engineer what about you ?\nFrench:"
input_ids = tokenizer(sample_text, return_tensors="pt").input_ids.to(device)

# Generate text
output_ids = peft_model.generate(input_ids, max_length=50)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Generated Translation:", output_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Translation: Translate: English → French
English:  my name is khaled i am software engineer what about you ?
French:  mon nom est khaled et je suis ingénieur logiciel et toi ?



In [16]:
sample_text = "Translate: French → English \nFrench:  Combien d’argent dois-je demander ?\English:"
input_ids = tokenizer(sample_text, return_tensors="pt").input_ids.to(device)

# Generate text
output_ids = peft_model.generate(input_ids, max_length=50)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Generated Translation:", output_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Translation: Translate: French → English 
French:  Combien d’argent dois-je demander ?\English: How much money should I ask for ?

## How much money should I ask for ?




In [17]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model.save_pretrained("saved_model")
tokenizer.save_pretrained("saved_model")

('saved_model\\tokenizer_config.json',
 'saved_model\\special_tokens_map.json',
 'saved_model\\tokenizer.model',
 'saved_model\\added_tokens.json',
 'saved_model\\tokenizer.json')