In [1]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/crop-and-soil-dataset/data_core.csv


In [2]:
data = pd.read_csv("/kaggle/input/crop-and-soil-dataset/data_core.csv")

In [3]:
data.columns

Index(['Temparature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [4]:
!pip install transformers datasets

Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: fsspec
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
bigframes 1.42.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.
torch 2.6.0+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and 

In [7]:
!pip install peft unsloth



In [8]:
!pip install accelerate bitsandbytes



In [9]:
data["Soil Type"].value_counts()

Soil Type
Clayey    1623
Black     1613
Red       1594
Loamy     1590
Sandy     1580
Name: count, dtype: int64

In [10]:
data["Fertilizer Name"].value_counts()

Fertilizer Name
14-35-14    1188
Urea        1170
DAP         1167
10-26-26    1128
17-17-17    1124
28-28       1120
20-20       1103
Name: count, dtype: int64

In [11]:
import unsloth # <<< MUST BE IMPORTED BEFORE TRANSFORMERS AND PEFT
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import TrainingArguments
from trl import SFTTrainer, DPOTrainer ##[MODIFIED] Added DPOTrainer
from peft import LoraConfig # LoraConfig might still be useful for definition clarity with Unsloth
from huggingface_hub import login
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-06-25 16:18:16.727589: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750868296.922320      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750868296.982050      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!


In [34]:
from huggingface_hub import login
login(token="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")

In [15]:
soil_encoder = LabelEncoder()
crop_encoder = LabelEncoder()
fertilizer_encoder = LabelEncoder()

data['Soil Type Encoded'] = soil_encoder.fit_transform(data['Soil Type'])
data['Crop Type Encoded'] = crop_encoder.fit_transform(data['Crop Type'])
data['Fertilizer Name Encoded'] = fertilizer_encoder.fit_transform(data['Fertilizer Name'])

In [16]:
train_df, val_df = train_test_split(data, test_size=0.1, random_state=42)

In [17]:
max_seq_length = 1024  # Increased slightly, adjust based on new prompt length and memory
dtype = None  # Autodetect (will be float16 for T4)
load_in_4bit = True  # Crucial for memory saving

model_name = "deepseek-ai/deepseek-llm-7b-base"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2025.6.5: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


pytorch_model.bin.index.json: 0.00B [00:00, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

deepseek-ai/deepseek-llm-7b-base does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.


In [19]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    print(f"Set pad_token to eos_token: {tokenizer.eos_token}")

lora_target_modules = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj",
]


In [20]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank
    lora_alpha=16, # Alpha scaling
    target_modules=lora_target_modules,
    lora_dropout=0.05, # Slightly reduced dropout
    bias="none",  # Set to 'none' for LoRA
    # task_type="CAUSAL_LM", # <<< REMOVE THIS LINE
    use_gradient_checkpointing=True, # <<< CRITICAL FOR MEMORY SAVING
)
print("LoRA model configured with gradient checkpointing.")

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.6.5 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


LoRA model configured with gradient checkpointing.


In [21]:
output_model_dir = "./deepseek_crop_fertilizer_finetuned"

In [22]:
import unsloth
import torch
# ... other imports from Cell 9 ...

# Add this line to check available GPUs
print(f"PyTorch CUDA device count: {torch.cuda.device_count()}")
if torch.cuda.device_count() < 2:
    print("WARNING: PyTorch does not see 2 GPUs. Multi-GPU training will not be possible.")
    print("Please check your Kaggle notebook settings to ensure 2 GPUs are allocated and active.")
else:
    print("PyTorch sees 2 or more GPUs. Proceeding with multi-GPU setup attempt.")


PyTorch CUDA device count: 2
PyTorch sees 2 or more GPUs. Proceeding with multi-GPU setup attempt.


In [27]:
## [ADDED] Cell 1: DPO/RLHF Data Preparation
# For DPO, we need a dataset with 'prompt', 'chosen', and 'rejected' columns.
# We'll create this synthetically from our 'train_df'.
# The 'chosen' response will be the ground truth from the dataset.
# The 'rejected' response will be a plausible but incorrect alternative.
# Here, we create a simple 'rejected' response by picking a different fertilizer.
import random

all_fertilizers = data['Fertilizer Name'].unique().tolist()

def create_dpo_format(row):
    # This is the same instruction format as in SFT
    prompt = (
        f"Given the following soil and environmental parameters:\n"
        f"- Temperature: {row['Temparature']}°C\n"
        f"- Humidity: {row['Humidity']}%\n"
        f"- Moisture: {row['Moisture']}%\n"
        f"- Soil Type: {row['Soil Type']}\n"
        f"- Nitrogen: {row['Nitrogen']} ppm\n"
        f"- Potassium: {row['Potassium']} ppm\n"
        f"- Phosphorous: {row['Phosphorous']} ppm\n\n"
        f"Predict the suitable Crop Type and Fertilizer Name, and provide brief information about how they work or their characteristics."
    )
    
    # Chosen response is the ground truth
    chosen_response = (
        f"Recommended Crop Type: {row['Crop Type']}\n"
        f"Recommended Fertilizer: {row['Fertilizer Name']}"
    )
    
    # Create a rejected response by picking a different, incorrect fertilizer
    correct_fertilizer = row['Fertilizer Name']
    rejected_fertilizer = random.choice([f for f in all_fertilizers if f != correct_fertilizer])
    
    rejected_response = (
        f"Recommended Crop Type: {row['Crop Type']}\n"
        f"Recommended Fertilizer: {rejected_fertilizer}"
    )
    
    return {
        'prompt': prompt,
        'chosen': chosen_response,
        'rejected': rejected_response
    }

# We only need a training set for DPO
dpo_dataset_list = train_df.apply(create_dpo_format, axis=1).tolist()
dpo_train_dataset = Dataset.from_pandas(pd.DataFrame(dpo_dataset_list))

print("DPO dataset created.")
print("Example:")
print(f"Prompt: {dpo_train_dataset[0]['prompt']}")
print(f"Chosen: {dpo_train_dataset[0]['chosen']}")
print(f"Rejected: {dpo_train_dataset[0]['rejected']}")

DPO dataset created.
Example:
Prompt: Given the following soil and environmental parameters:
- Temperature: 25.52°C
- Humidity: 48.3%
- Moisture: 60.18%
- Soil Type: Sandy
- Nitrogen: 13 ppm
- Potassium: 2 ppm
- Phosphorous: 11 ppm

Predict the suitable Crop Type and Fertilizer Name, and provide brief information about how they work or their characteristics.
Chosen: Recommended Crop Type: Wheat
Recommended Fertilizer: 28-28
Rejected: Recommended Crop Type: Wheat
Recommended Fertilizer: Urea


In [28]:
## [DEFINITIVE FIX] Cell 2: DPO Trainer Setup
from trl import DPOConfig # Import the correct configuration class

# The DPOTrainer should be configured with DPOConfig, not TrainingArguments.
# DPOConfig includes all TrainingArguments fields plus DPO-specific ones.

output_dpo_model_dir = "./deepseek_crop_fertilizer_dpo_finetuned"

# 1. First, ensure the tokenizer has a pad token ID.
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
    print(f"Set pad_token_id to eos_token_id: {tokenizer.eos_token_id}")

# 2. Use DPOConfig for all arguments
dpo_config = DPOConfig(
    # DPO-specific parameters
    beta=0.1,

    # TrainingArguments parameters
    output_dir=output_dpo_model_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    num_train_epochs=1,
    learning_rate=5e-5,
    fp16=True,
    logging_steps=10,
    optim="adamw_8bit",
    report_to="none",
    
    # The following arguments are part of DPOConfig and will prevent the errors
    # We can leave them as default (None) and let the trainer handle them
    model_init_kwargs=None,
    ref_model_init_kwargs=None,
    padding_value=tokenizer.pad_token_id, # Set the padding value directly here
)

# 3. Initialize the DPOTrainer
# Note: We no longer pass 'beta' or 'padding_value' here, as they are now inside the config object.
dpo_trainer = DPOTrainer(
    model=model,
    args=dpo_config, # Pass the DPOConfig object here
    train_dataset=dpo_train_dataset,
    tokenizer=tokenizer,
)

print("DPOTrainer initialized successfully with DPOConfig.")

Extracting prompt in train dataset (num_proc=4):   0%|          | 0/10 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=4):   0%|          | 0/10 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=4):   0%|          | 0/10 [00:00<?, ? examples/s]

DPOTrainer initialized successfully with DPOConfig.


In [29]:
print("Starting DPO training...")
dpo_trainer.train()
print("DPO training finished.")

Starting DPO training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10 | Num Epochs = 1 | Total steps = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 37,478,400/7,000,000,000 (0.54% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / chosen,logps / rejected,logits / chosen,logits / rejected,eval_logits / chosen,eval_logits / rejected,nll_loss,aux_loss


DPO training finished.


In [30]:
final_model_save_path = "./final_deepseek_model_with_info"
dpo_trainer.model.save_pretrained(final_model_save_path) ## [MODIFIED] Save the DPO model
tokenizer.save_pretrained(final_model_save_path)
print(f"Model and tokenizer saved to {final_model_save_path}")

Model and tokenizer saved to ./final_deepseek_model_with_info


In [31]:
import pickle

In [35]:
final_model_save_path = "/final_deepseek_model_with_info"
hf_token = "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"
if hasattr(dpo_trainer, 'model') and dpo_trainer.model is not None: ## [MODIFIED] Check dpo_trainer
    dpo_trainer.model.save_pretrained(final_model_save_path) ## [MODIFIED] Save the DPO model
    tokenizer.save_pretrained(final_model_save_path)
    print(f"Model adapters and tokenizer saved locally to {final_model_save_path}")

    os.makedirs(final_model_save_path, exist_ok=True) # Ensure directory exists
    # Assuming soil_encoder, crop_encoder, fertilizer_encoder are defined and trained
    with open(os.path.join(final_model_save_path, "/kaggle/working/soil_encoder.pkl"), "wb") as f:
        pickle.dump(soil_encoder, f)
    with open(os.path.join(final_model_save_path, "/kaggle/working/crop_encoder.pkl"), "wb") as f:
        pickle.dump(crop_encoder, f)
    with open(os.path.join(final_model_save_path, "/kaggle/working/fertilizer_encoder.pkl"), "wb") as f:
        pickle.dump(fertilizer_encoder, f)
    print(f"Encoders saved locally to {final_model_save_path}")

    hub_model_id = "aryan6637/deepseek-crop-fertilizer-dpo" # Changed to v4 for the DPO version
    print(f"Attempting to push model and tokenizer to Hugging Face Hub: {hub_model_id}")

    try:
        dpo_trainer.model.push_to_hub(hub_model_id, token=hf_token)
        tokenizer.push_to_hub(hub_model_id, token=hf_token)
        print(f"Model (LoRA adapters) and tokenizer pushed successfully to {hub_model_id}")

    except Exception as e:
        print(f"Error pushing to Hugging Face Hub: {e}")
        import traceback
        traceback.print_exc()
        print("Please ensure your token has 'write' permissions and the repository exists or can be created by you.")

else:
    print("Training might have failed or the model was not available on the dpo_trainer object. Skipping saving and pushing to Hub.")


Model adapters and tokenizer saved locally to /final_deepseek_model_with_info
Encoders saved locally to /final_deepseek_model_with_info
Attempting to push model and tokenizer to Hugging Face Hub: aryan6637/deepseek-crop-fertilizer-dpo


README.md:   0%|          | 0.00/586 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/150M [00:00<?, ?B/s]

Saved model to https://huggingface.co/aryan6637/deepseek-crop-fertilizer-dpo
Model (LoRA adapters) and tokenizer pushed successfully to aryan6637/deepseek-crop-fertilizer-dpo
