In [1]:
import unsloth

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.7.0+cu126 with CUDA 1206 (you have 2.5.1+cu121)
    Python  3.11.9 (you have 3.11.9)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")

CUDA available: True
CUDA device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 4080 Laptop GPU


In [3]:
# Dpendencies
import pandas as pd
import re
import json
from datetime import datetime
from unsloth import FastLanguageModel
from datasets import Dataset
import json
from trl import SFTTrainer
from transformers import TrainingArguments

In [2]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install transformers==4.51.3
    !pip install --no-deps unsloth

In [4]:
# Phase 1 Data Ingestion and Cleaning
df1 = pd.read_csv('data/f1visa_posts_2025-06-20.csv')
df2 = pd.read_csv('data/usvisascheduling_posts_2025-06-20.csv')

In [5]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1627 entries, 0 to 1626
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1627 non-null   object 
 1   title          1627 non-null   object 
 2   author         1627 non-null   object 
 3   ups            1627 non-null   int64  
 4   downs          1627 non-null   int64  
 5   score          1627 non-null   int64  
 6   upvote_ratio   1627 non-null   float64
 7   num_comments   1627 non-null   int64  
 8   created_utc    1627 non-null   int64  
 9   created_date   1627 non-null   object 
 10  url            1627 non-null   object 
 11  external_url   1627 non-null   object 
 12  selftext       1616 non-null   object 
 13  selftext_html  1616 non-null   object 
 14  thumbnail      1627 non-null   object 
 15  is_self        1627 non-null   bool   
 16  domain         1627 non-null   object 
 17  subreddit      1627 non-null   object 
 18  flair_te

In [6]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2150 entries, 0 to 2149
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             2150 non-null   object 
 1   title          2150 non-null   object 
 2   author         2150 non-null   object 
 3   ups            2150 non-null   int64  
 4   downs          2150 non-null   int64  
 5   score          2150 non-null   int64  
 6   upvote_ratio   2150 non-null   float64
 7   num_comments   2150 non-null   int64  
 8   created_utc    2150 non-null   int64  
 9   created_date   2150 non-null   object 
 10  url            2150 non-null   object 
 11  external_url   2150 non-null   object 
 12  selftext       2051 non-null   object 
 13  selftext_html  2051 non-null   object 
 14  thumbnail      2150 non-null   object 
 15  is_self        2150 non-null   bool   
 16  domain         2138 non-null   object 
 17  subreddit      2150 non-null   object 
 18  flair_te

In [7]:
dataset = pd.concat([df1, df2], axis=0)
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3777 entries, 0 to 2149
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             3777 non-null   object 
 1   title          3777 non-null   object 
 2   author         3777 non-null   object 
 3   ups            3777 non-null   int64  
 4   downs          3777 non-null   int64  
 5   score          3777 non-null   int64  
 6   upvote_ratio   3777 non-null   float64
 7   num_comments   3777 non-null   int64  
 8   created_utc    3777 non-null   int64  
 9   created_date   3777 non-null   object 
 10  url            3777 non-null   object 
 11  external_url   3777 non-null   object 
 12  selftext       3667 non-null   object 
 13  selftext_html  3667 non-null   object 
 14  thumbnail      3777 non-null   object 
 15  is_self        3777 non-null   bool   
 16  domain         3765 non-null   object 
 17  subreddit      3777 non-null   object 
 18  flair_text   

In [8]:
# Function to clean the data frames
def clean_text(text):
    if pd.isna(text):
        return ""
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', '', str(text))
    # Remove URLs
    text = re.sub(r'http\S+|www\S+', '', text)
    # Clean extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()
    return text

dataset['clean_title'] = dataset['title'].apply(clean_text)
dataset['clean_selftext'] = dataset['selftext'].apply(clean_text)

# Filter relevant posts (adjust criteria as needed)
visa_keywords = ['visa', 'interview', 'embassy', 'consulate', 'f1', 'h1b', 'green card', 'immigration']
df_filtered = dataset[dataset['clean_title'].str.contains('|'.join(visa_keywords), case=False, na=False)]

print(f"Filtered data shape: {df_filtered.shape}")

Filtered data shape: (2072, 26)


In [9]:
# Phase 2 Create Traning Data for the LLM
# Create conversation format for fine-tuning
def create_training_data(df):
    training_data = []
    
    for idx, row in df.iterrows():
        # Create visa officer persona prompt
        system_prompt = """You are an experienced U.S. Visa Officer conducting a visa interview. You are professional, thorough, and fair. You ask relevant questions to assess the applicant's eligibility and intentions. Be direct but courteous."""
        
        # Use title as question context and selftext as response context
        if row['clean_selftext'] and len(row['clean_selftext']) > 50:
            user_message = f"I'm applying for a visa. {row['clean_title']}"
            assistant_response = f"I understand your situation. As a visa officer, I need to ask you some questions. {row['clean_selftext'][:500]}"
            
            conversation = {
                "messages": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_message},
                    {"role": "assistant", "content": assistant_response}
                ]
            }
            training_data.append(conversation)
    
    return training_data

training_data = create_training_data(df_filtered)

# Save training data
with open('visa_training_data.jsonl', 'w') as f:
    for item in training_data:
        f.write(json.dumps(item) + '\n')

print(f"Created {len(training_data)} training examples")

Created 1969 training examples


In [10]:
# Phase 3: Training 
# Convert to Unsloth format
def format_for_unsloth(training_data):
    formatted_data = []
    
    for item in training_data:
        messages = item['messages']
        # Combine into single text with special tokens
        text = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{messages[0]['content']}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{messages[1]['content']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n{messages[2]['content']}<|eot_id|>"
        
        formatted_data.append({"text": text})
    
    return formatted_data

formatted_data = format_for_unsloth(training_data)

# Save as JSON for Unsloth
with open('unsloth_training_data.json', 'w') as f:
    json.dump(formatted_data, f, indent=2)

In [11]:
# Training Script
with open('unsloth_training_data.json', 'r') as f:
    data = json.load(f)

# Convert to dataset
dataset = Dataset.from_list(data)

# Load model
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3-8b-bnb-4bit",  # Using 4bit quantized version
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

# Training arguments
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,  # Adjust based on your data size
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="visa_model_output",
        save_steps=50,
        save_total_limit=2,
    ),
)

# Start training
trainer.train()

# Save the model
model.save_pretrained("visa_officer_model")
tokenizer.save_pretrained("visa_officer_model")

  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"{DEVICE_TYPE}:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.5.9: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA GeForce RTX 4080 Laptop GPU. Num GPUs = 1. Max memory: 11.994 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.9. CUDA Toolkit: 12.1. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.5.9 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Unsloth: Tokenizing ["text"]:   0%|          | 0/1969 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,969 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.5544
2,2.3929
3,2.5069
4,2.0967
5,1.3485
6,0.8879
7,0.6838
8,0.7092
9,0.568
10,0.5965


('visa_officer_model\\tokenizer_config.json',
 'visa_officer_model\\special_tokens_map.json',
 'visa_officer_model\\tokenizer.json')

In [12]:
# Phase 4 conversion to inference type
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="visa_officer_model",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

# Convert to fast inference mode
FastLanguageModel.for_inference(model)

# Save in different formats
# For Ollama integration
model.save_pretrained_merged("visa_officer_merged", tokenizer, save_method="merged_16bit")

# For GGUF format (smaller size)
model.save_pretrained_gguf("visa_officer_gguf", tokenizer, quantization_method="q4_k_m")

==((====))==  Unsloth 2025.5.9: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA GeForce RTX 4080 Laptop GPU. Num GPUs = 1. Max memory: 11.994 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.9. CUDA Toolkit: 12.1. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 7.36 out of 31.81 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


  0%|                                                                                                                                                                                                                      | 0/32 [00:00<?, ?it/s]
We will save to Disk and not RAM now.
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [06:15<00:00, 11.74s/it]


Unsloth: Saving tokenizer... Done.
Done.


RuntimeError: *** Unsloth: Failed compiling llama.cpp using os.system(...) with error 1. Please report this ASAP!