In [3]:
# Cell 3 - (largely unchanged, ensure transformers is installed)
import os
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import re
import sys

MODEL_ID = "aryan6637/ner_training_with_values"
ENTITY_TYPES = [
    "O",
    "B-SOIL_TYPE", "I-SOIL_TYPE",
    "B-NUTRIENT", "I-NUTRIENT",
    "B-TEMPERATURE_VALUE", "I-TEMPERATURE_VALUE",
    "B-HUMIDITY_VALUE", "I-HUMIDITY_VALUE",
    "B-MOISTURE_VALUE", "I-MOISTURE_VALUE",
    "B-NITROGEN_VALUE", "I-NITROGEN_VALUE",
    "B-POTASSIUM_VALUE", "I-POTASSIUM_VALUE",
    "B-PHOSPHOROUS_VALUE", "I-PHOSPHOROUS_VALUE"
]

LABEL_TO_ID = {label: i for i, label in enumerate(ENTITY_TYPES)}
ID_TO_LABEL = {i: label for label, i in LABEL_TO_ID.items()}

REQUIRED_PARAMS = {
    'Temparature': {'ner_tag': 'TEMPERATURE_VALUE', 'type': float, 'prompt': 'Enter Temperature (°C): '},
    'Humidity': {'ner_tag': 'HUMIDITY_VALUE', 'type': float, 'prompt': 'Enter Humidity (%): '},
    'Moisture': {'ner_tag': 'MOISTURE_VALUE', 'type': float, 'prompt': 'Enter Moisture (units): '},
    'Soil Type': {'ner_tag': 'SOIL_TYPE', 'type': str, 'prompt': 'Enter Soil Type: '},
    'Nitrogen': {'ner_tag': 'NITROGEN_VALUE', 'type': int, 'prompt': 'Enter Nitrogen (ppm): '},
    'Potassium': {'ner_tag': 'POTASSIUM_VALUE', 'type': int, 'prompt': 'Enter Potassium (ppm): '},
    'Phosphorous': {'ner_tag': 'PHOSPHOROUS_VALUE', 'type': int, 'prompt': 'Enter Phosphorous (ppm): '},
}

def clean_numeric_string(value_str):
    if value_str is None:
        return None
    return value_str.replace('%', '').replace('°C', '').replace('C', '').replace('units', '').replace('ppm', '').replace(',', '').strip()

def clean_soil_type_string(value_str):
    if value_str is None:
        return None
    return value_str.strip().rstrip(',.').strip()

def clean_nutrient_name_string(value_str):
    if value_str is None:
        return None
    return value_str.strip().rstrip(',.').strip()

tokenizer_bert = AutoTokenizer.from_pretrained(MODEL_ID)
bert_model = AutoModelForTokenClassification.from_pretrained(MODEL_ID) # Renamed 'bert' to 'bert_model' to avoid conflict
# Ensure pipeline uses CPU if no GPU is available. `device=-1` forces CPU.
# If you have a GPU but want to force CPU: device=-1
# Otherwise, it will auto-detect.
nlp_device = 0 if torch.cuda.is_available() else -1
nlp = pipeline("ner", model=bert_model, tokenizer=tokenizer_bert, aggregation_strategy="simple", device=nlp_device)
print(f"NER pipeline running on {'GPU' if nlp_device == 0 else 'CPU'}.")

2025-06-05 08:30:50.563444: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749112250.865143      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749112250.938751      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/431M [00:00<?, ?B/s]

Device set to use cpu


NER pipeline running on CPU.


In [4]:

def get_parameters(sentence):
    if not sentence.strip():
        sys.exit(0)

    ner_results = nlp(sentence)
    extracted_raw_values = {}
    detected_nutrient_names = []

    for entity in ner_results:
        if entity['entity_group'] == 'NUTRIENT':
            cleaned_name = clean_nutrient_name_string(entity['word'])
            if cleaned_name and cleaned_name.lower() in ['nitrogen', 'n', 'potassium', 'k', 'phosphorous', 'phosphorus', 'p'] and cleaned_name not in detected_nutrient_names:
                detected_nutrient_names.append(cleaned_name)

        param_name_from_tag = None
        for req_param_name, details in REQUIRED_PARAMS.items():
            if details['ner_tag'] == entity['entity_group']:
                param_name_from_tag = req_param_name
                break

        if param_name_from_tag and param_name_from_tag not in extracted_raw_values:
            extracted_raw_values[param_name_from_tag] = entity['word']

    final_parameters = {}
    for param_name, details in REQUIRED_PARAMS.items():
        ner_tag = details['ner_tag']
        expected_type = details['type']
        prompt = details['prompt']
        raw_value_from_ner = extracted_raw_values.get(param_name)

        processed_value = None

        if raw_value_from_ner:
            cleaned_value_str = clean_numeric_string(raw_value_from_ner) if expected_type in [float, int] else clean_soil_type_string(raw_value_from_ner)
            try:
                if expected_type == float:
                    processed_value = float(cleaned_value_str)
                elif expected_type == int:
                    processed_value = int(float(cleaned_value_str))
                else:
                    processed_value = cleaned_value_str
            except ValueError:
                pass

        if processed_value is None:
            while True:
                manual_input_str = input(prompt)
                try:
                    if expected_type == float:
                        manual_value = float(manual_input_str)
                    elif expected_type == int:
                        manual_value = int(float(manual_input_str))
                    else:
                        manual_value = manual_input_str.strip()

                    if expected_type != str and manual_value < 0:
                        continue

                    processed_value = manual_value
                    break
                except ValueError:
                    continue

        final_parameters[param_name] = processed_value
    return final_parameters


In [5]:
parameters = get_parameters(input("Enter the sentence : "))

Enter the sentence :  he red soil contains 30 C with soil humidity of 45 % along with nutrient values N: 45 , P:34 , k:33
Enter Moisture (units):  12


In [6]:
parameters

{'Temparature': 30.0,
 'Humidity': 45.0,
 'Moisture': 12.0,
 'Soil Type': 'red',
 'Nitrogen': 45,
 'Potassium': 33,
 'Phosphorous': 34}