In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
        # print(os.path.join(dirname, filename))
#
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from huggingface_hub import login
login(token="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")

In [3]:
!huggingface-cli whoami

aryan6637


In [4]:
import torch
import pandas as pd
from datasets import Dataset, Features, Sequence, Value, ClassLabel
from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer, DataCollatorForTokenClassification
import numpy as np
import os

In [5]:
from transformers import (
    AutoModelForTokenClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForTokenClassification,
    EvalPrediction,
    pipeline
)

In [1]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import re
import sys

MODEL_ID = "aryan6637/ner_training_with_values"
ENTITY_TYPES = [
    "O",
    "B-SOIL_TYPE", "I-SOIL_TYPE",
    "B-NUTRIENT", "I-NUTRIENT",
    "B-TEMPERATURE_VALUE", "I-TEMPERATURE_VALUE",
    "B-HUMIDITY_VALUE", "I-HUMIDITY_VALUE",
    "B-MOISTURE_VALUE", "I-MOISTURE_VALUE",
    "B-NITROGEN_VALUE", "I-NITROGEN_VALUE",
    "B-POTASSIUM_VALUE", "I-POTASSIUM_VALUE",
    "B-PHOSPHOROUS_VALUE", "I-PHOSPHOROUS_VALUE"
]

LABEL_TO_ID = {label: i for i, label in enumerate(ENTITY_TYPES)}
ID_TO_LABEL = {i: label for label, i in LABEL_TO_ID.items()}

REQUIRED_PARAMS = {
    'Temparature': {'ner_tag': 'TEMPERATURE_VALUE', 'type': float, 'prompt': 'Enter Temperature (°C): '},
    'Humidity': {'ner_tag': 'HUMIDITY_VALUE', 'type': float, 'prompt': 'Enter Humidity (%): '},
    'Moisture': {'ner_tag': 'MOISTURE_VALUE', 'type': float, 'prompt': 'Enter Moisture (units): '},
    'Soil Type': {'ner_tag': 'SOIL_TYPE', 'type': str, 'prompt': 'Enter Soil Type: '},
    'Nitrogen': {'ner_tag': 'NITROGEN_VALUE', 'type': int, 'prompt': 'Enter Nitrogen (ppm): '},
    'Potassium': {'ner_tag': 'POTASSIUM_VALUE', 'type': int, 'prompt': 'Enter Potassium (ppm): '},
    'Phosphorous': {'ner_tag': 'PHOSPHOROUS_VALUE', 'type': int, 'prompt': 'Enter Phosphorous (ppm): '},
}

def clean_numeric_string(value_str):
    if value_str is None:
        return None
    return value_str.replace('%', '').replace('°C', '').replace('C', '').replace('units', '').replace('ppm', '').replace(',', '').strip()

def clean_soil_type_string(value_str):
    if value_str is None:
        return None
    return value_str.strip().rstrip(',.').strip()

def clean_nutrient_name_string(value_str):
    if value_str is None:
        return None
    return value_str.strip().rstrip(',.').strip()

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

try:
    sentence = input("Sentence: ")
    if not sentence.strip():
        sys.exit(0)

    ner_results = nlp(sentence)
    extracted_raw_values = {}
    detected_nutrient_names = []

    for entity in ner_results:
        if entity['entity_group'] == 'NUTRIENT':
            cleaned_name = clean_nutrient_name_string(entity['word'])
            if cleaned_name and cleaned_name.lower() in ['nitrogen', 'n', 'potassium', 'k', 'phosphorous', 'phosphorus', 'p'] and cleaned_name not in detected_nutrient_names:
                detected_nutrient_names.append(cleaned_name)

        param_name_from_tag = None
        for req_param_name, details in REQUIRED_PARAMS.items():
            if details['ner_tag'] == entity['entity_group']:
                param_name_from_tag = req_param_name
                break

        if param_name_from_tag and param_name_from_tag not in extracted_raw_values:
            extracted_raw_values[param_name_from_tag] = entity['word']

    final_parameters = {}
    for param_name, details in REQUIRED_PARAMS.items():
        ner_tag = details['ner_tag']
        expected_type = details['type']
        prompt = details['prompt']
        raw_value_from_ner = extracted_raw_values.get(param_name)

        processed_value = None

        if raw_value_from_ner:
            cleaned_value_str = clean_numeric_string(raw_value_from_ner) if expected_type in [float, int] else clean_soil_type_string(raw_value_from_ner)
            try:
                if expected_type == float:
                    processed_value = float(cleaned_value_str)
                elif expected_type == int:
                    processed_value = int(float(cleaned_value_str))
                else:
                    processed_value = cleaned_value_str
            except ValueError:
                pass

        if processed_value is None:
            while True:
                manual_input_str = input(prompt)
                try:
                    if expected_type == float:
                        manual_value = float(manual_input_str)
                    elif expected_type == int:
                        manual_value = int(float(manual_input_str))
                    else:
                        manual_value = manual_input_str.strip()

                    if expected_type != str and manual_value < 0:
                        continue

                    processed_value = manual_value
                    break
                except ValueError:
                    continue

        final_parameters[param_name] = processed_value

    instruction_text = (
        f"Given the following soil and environmental parameters:\n"
        f"- Temperature: {final_parameters['Temparature']}°C\n"
        f"- Humidity: {final_parameters['Humidity']}%\n"
        f"- Moisture: {final_parameters['Moisture']}\n"
        f"- Soil Type: {final_parameters['Soil Type']}\n"
        f"- Nitrogen: {final_parameters['Nitrogen']} ppm\n"
        f"- Potassium: {final_parameters['Potassium']} ppm\n"
        f"- Phosphorous: {final_parameters['Phosphorous']} ppm\n\n"
        f"Predict the suitable Crop Type and Fertilizer Name, and provide brief information about how they work or their characteristics."
    )
    print(instruction_text)

except Exception as e:
    print(f"\nError: {e}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/431M [00:00<?, ?B/s]

Device set to use cpu


Sentence: The red soil has a total temprature of 45 C with humidity of 45 % and the nutrients are Pottasium :45 ,Phosporus:34
Enter Moisture (units): 23
Enter Nitrogen (ppm): 45
Given the following soil and environmental parameters:
- Temperature: 45.0°C
- Humidity: 45.0%
- Moisture: 23.0
- Soil Type: red
- Nitrogen: 45 ppm
- Potassium: 45 ppm
- Phosphorous: 34 ppm

Predict the suitable Crop Type and Fertilizer Name, and provide brief information about how they work or their characteristics.


In [2]:
final_parameters

{'Temparature': 45.0,
 'Humidity': 45.0,
 'Moisture': 23.0,
 'Soil Type': 'red',
 'Nitrogen': 45,
 'Potassium': 45,
 'Phosphorous': 34}