In [1]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2025.5.9-py3-none-any.whl.metadata (47 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/47.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.5.11 (from unsloth)
  Downloading unsloth_zoo-2025.5.11-py3-none-any.whl.metadata (8.1 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.24-py3-none-any.whl.metadata (11 kB)
Collecting datasets>=3.4.1 (from unsloth)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)
  Downloading trl-0.18

In [1]:
import unsloth
import torch
from unsloth import FastLanguageModel
import pickle
import os
from huggingface_hub import hf_hub_download, login

print(f"Unsloth version: {unsloth.__version__}")
print(f"PyTorch version: {torch.__version__}")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Unsloth version: 2025.5.9
PyTorch version: 2.7.0+cu126


In [2]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import re
import sys

MODEL_ID = "aryan6637/ner_training_with_values"
ENTITY_TYPES = [
    "O",
    "B-SOIL_TYPE", "I-SOIL_TYPE",
    "B-NUTRIENT", "I-NUTRIENT",
    "B-TEMPERATURE_VALUE", "I-TEMPERATURE_VALUE",
    "B-HUMIDITY_VALUE", "I-HUMIDITY_VALUE",
    "B-MOISTURE_VALUE", "I-MOISTURE_VALUE",
    "B-NITROGEN_VALUE", "I-NITROGEN_VALUE",
    "B-POTASSIUM_VALUE", "I-POTASSIUM_VALUE",
    "B-PHOSPHOROUS_VALUE", "I-PHOSPHOROUS_VALUE"
]

LABEL_TO_ID = {label: i for i, label in enumerate(ENTITY_TYPES)}
ID_TO_LABEL = {i: label for label, i in LABEL_TO_ID.items()}

REQUIRED_PARAMS = {
    'Temparature': {'ner_tag': 'TEMPERATURE_VALUE', 'type': float, 'prompt': 'Enter Temperature (°C): '},
    'Humidity': {'ner_tag': 'HUMIDITY_VALUE', 'type': float, 'prompt': 'Enter Humidity (%): '},
    'Moisture': {'ner_tag': 'MOISTURE_VALUE', 'type': float, 'prompt': 'Enter Moisture (units): '},
    'Soil Type': {'ner_tag': 'SOIL_TYPE', 'type': str, 'prompt': 'Enter Soil Type: '},
    'Nitrogen': {'ner_tag': 'NITROGEN_VALUE', 'type': int, 'prompt': 'Enter Nitrogen (ppm): '},
    'Potassium': {'ner_tag': 'POTASSIUM_VALUE', 'type': int, 'prompt': 'Enter Potassium (ppm): '},
    'Phosphorous': {'ner_tag': 'PHOSPHOROUS_VALUE', 'type': int, 'prompt': 'Enter Phosphorous (ppm): '},
}

def clean_numeric_string(value_str):
    if value_str is None:
        return None
    return value_str.replace('%', '').replace('°C', '').replace('C', '').replace('units', '').replace('ppm', '').replace(',', '').strip()

def clean_soil_type_string(value_str):
    if value_str is None:
        return None
    return value_str.strip().rstrip(',.').strip()

def clean_nutrient_name_string(value_str):
    if value_str is None:
        return None
    return value_str.strip().rstrip(',.').strip()

tokenizer_bert = AutoTokenizer.from_pretrained(MODEL_ID)
bert = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
nlp = pipeline("ner", model=bert, tokenizer=tokenizer_bert, aggregation_strategy="simple")

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/431M [00:00<?, ?B/s]

Device set to use cuda:0


In [3]:
model_repo_id = "aryan6637/deepseek-crop-fertilizer-info-v3"
max_seq_length = 1024  # Must match the max_seq_length used during training
dtype = None  # Autodetect (will be float16 for T4/V100, bfloat16 for A100)
load_in_4bit = True  # Use 4-bit quantization to save memory

In [4]:
print(f"Loading model from {model_repo_id}...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_repo_id,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    # token=hf_token, # Add your token here if the model is private
)
print("Model and tokenizer loaded successfully.")

if torch.cuda.is_available():
    model.to("cuda")
    print("Model moved to GPU.")
else:
    print("CUDA not available. Running on CPU. This might be very slow.")

if tokenizer.pad_token is None:
    print("Tokenizer does not have a pad token. Setting to eos_token.")
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.pad_token_id

print(f"Pad token: {tokenizer.pad_token}, ID: {tokenizer.pad_token_id}")
print(f"EOS token: {tokenizer.eos_token}, ID: {tokenizer.eos_token_id}")

Loading model from aryan6637/deepseek-crop-fertilizer-info-v3...
==((====))==  Unsloth 2025.5.9: Fast Llama patching. Transformers: 4.52.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

deepseek-ai/deepseek-llm-7b-base does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.


adapter_model.safetensors:   0%|          | 0.00/150M [00:00<?, ?B/s]

Unsloth 2025.5.9 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Model and tokenizer loaded successfully.
Model moved to GPU.
Pad token: <|PAD_TOKEN|>, ID: 100015
EOS token: <｜end▁of▁sentence｜>, ID: 100001


In [5]:

def get_parameters(sentence):
    if not sentence.strip():
        sys.exit(0)

    ner_results = nlp(sentence)
    extracted_raw_values = {}
    detected_nutrient_names = []

    for entity in ner_results:
        if entity['entity_group'] == 'NUTRIENT':
            cleaned_name = clean_nutrient_name_string(entity['word'])
            if cleaned_name and cleaned_name.lower() in ['nitrogen', 'n', 'potassium', 'k', 'phosphorous', 'phosphorus', 'p'] and cleaned_name not in detected_nutrient_names:
                detected_nutrient_names.append(cleaned_name)

        param_name_from_tag = None
        for req_param_name, details in REQUIRED_PARAMS.items():
            if details['ner_tag'] == entity['entity_group']:
                param_name_from_tag = req_param_name
                break

        if param_name_from_tag and param_name_from_tag not in extracted_raw_values:
            extracted_raw_values[param_name_from_tag] = entity['word']

    final_parameters = {}
    for param_name, details in REQUIRED_PARAMS.items():
        ner_tag = details['ner_tag']
        expected_type = details['type']
        prompt = details['prompt']
        raw_value_from_ner = extracted_raw_values.get(param_name)

        processed_value = None

        if raw_value_from_ner:
            cleaned_value_str = clean_numeric_string(raw_value_from_ner) if expected_type in [float, int] else clean_soil_type_string(raw_value_from_ner)
            try:
                if expected_type == float:
                    processed_value = float(cleaned_value_str)
                elif expected_type == int:
                    processed_value = int(float(cleaned_value_str))
                else:
                    processed_value = cleaned_value_str
            except ValueError:
                pass

        if processed_value is None:
            while True:
                manual_input_str = input(prompt)
                try:
                    if expected_type == float:
                        manual_value = float(manual_input_str)
                    elif expected_type == int:
                        manual_value = int(float(manual_input_str))
                    else:
                        manual_value = manual_input_str.strip()

                    if expected_type != str and manual_value < 0:
                        continue

                    processed_value = manual_value
                    break
                except ValueError:
                    continue

        final_parameters[param_name] = processed_value
    return final_parameters


In [6]:
def format_inference_prompt(final_parameters):
    instruction_text = (
        f"Given the following soil and environmental parameters:\n"
        f"- Temperature: {final_parameters['Temparature']}°C\n"
        f"- Humidity: {final_parameters['Humidity']}%\n"
        f"- Moisture: {final_parameters['Moisture']}\n"
        f"- Soil Type: {final_parameters['Soil Type']}\n"
        f"- Nitrogen: {final_parameters['Nitrogen']} ppm\n"
        f"- Potassium: {final_parameters['Potassium']} ppm\n"
        f"- Phosphorous: {final_parameters['Phosphorous']} ppm\n\n"
        f"Predict the suitable Crop Type and Fertilizer Name, and provide brief information about how they work or their characteristics."
    )
    # Alpaca format - the model was trained to generate text after "### Response:\n"
    formatted_prompt = f"### Instruction:\n{instruction_text}\n\n### Response:\n"
    return formatted_prompt

In [11]:
def get_prediction(final_parameters):
  inference_prompt = format_inference_prompt(final_parameters)
# Tokenize the prompt
  device = "cuda" if torch.cuda.is_available() else "cpu"
  inputs = tokenizer(inference_prompt, return_tensors="pt").to(device)
# Generate response
  print("\nGenerating response...")
  with torch.no_grad(): # Ensure no gradients are calculated during inference
      outputs = model.generate(
        **inputs,
        max_new_tokens=150,  # Adjust based on expected output length (crop name + fertilizer name + brief info)
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
        do_sample=True,      # Set to True for more creative/varied responses
        temperature=0.6,     # Lower for more factual, higher for more random (0.1-1.0)
        top_p=0.9,           # Nucleus sampling: considers the smallest set of tokens whose cumulative probability exceeds top_p
        # num_beams=1,       # Use 1 for greedy/sampling, >1 for beam search (slower but can be better)
    )

# Decode and print the full generated text (including prompt)
  full_response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
  print("\n--- Full Generated Text (with prompt) ---")
  print(full_response_text)
# Extract only the newly generated part (the actual response)
  response_only_text = full_response_text[len(inference_prompt):].strip()
  print("\n--- Model's Recommendation ---")
  print(response_only_text)

In [7]:
parameters = get_parameters(input("Enter the sentence"))

Enter the sentenceThe red soil contains 30 C with soil humidity of 45 % along with nutrient values N: 45 , P:34 , k:3
Enter Moisture (units): 12


In [8]:
parameters

{'Temparature': 30.0,
 'Humidity': 45.0,
 'Moisture': 12.0,
 'Soil Type': 'red',
 'Nitrogen': 45,
 'Potassium': 3,
 'Phosphorous': 34}

In [12]:
get_prediction(parameters)


Generating response...

--- Full Generated Text (with prompt) ---
### Instruction:
Given the following soil and environmental parameters:
- Temperature: 30.0°C
- Humidity: 45.0%
- Moisture: 12.0
- Soil Type: red
- Nitrogen: 45 ppm
- Potassium: 3 ppm
- Phosphorous: 34 ppm

Predict the suitable Crop Type and Fertilizer Name, and provide brief information about how they work or their characteristics.

### Response:
Recommended Crop Type: Sugarcane
Recommended Fertilizer: 17-17-17


--- Model's Recommendation ---
Recommended Crop Type: Sugarcane
Recommended Fertilizer: 17-17-17


In [None]:
"The red soil contains 30 C with soil humidity of 45 % along with nutrient values N: 45 , P:34 , k:33"