In [1]:
pip install transformers torch pandas scikit-learn




In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer

data = pd.read_csv('corpus2 (1).csv')

label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

train_texts, test_texts, train_labels, test_labels = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128)


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

class CropDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.tolist()

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CropDataset(train_encodings, train_labels)
test_dataset = CropDataset(test_encodings, test_labels)

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps"
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

trainer.train()

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss,Validation Loss
10,3.1083,3.096203
20,3.0982,3.094204
30,3.1135,3.102519
40,3.1002,3.126945
50,3.1037,3.140733
60,3.1463,3.099533
70,3.1565,3.098154
80,3.1395,3.099376
90,3.0877,3.095666
100,3.1227,3.10551


TrainOutput(global_step=880, training_loss=1.613702652264725, metrics={'train_runtime': 249.9492, 'train_samples_per_second': 28.166, 'train_steps_per_second': 3.521, 'total_flos': 199013466432000.0, 'train_loss': 1.613702652264725, 'epoch': 4.0})

In [5]:
# Function to predict crop type based on input features
def predict_crop(model, tokenizer, text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1)
    return label_encoder.inverse_transform(prediction.cpu().numpy())[0]

# Function to get feature values one by one from the user
def get_features():
    feature_names = ["Nitrogen (N)", "Phosphorus (P)", "Potassium (K)", "Temperature", "Humidity", "pH", "Rainfall"]
    features = []

    print("Please enter the following features one by one:")

    for feature in feature_names:
        while True:
            try:
                value = float(input(f"Enter value for {feature}: "))
                features.append(value)
                break
            except ValueError:
                print(f"Invalid input for {feature}. Please enter a numeric value.")

    # Convert features to the string format expected by the tokenizer
    return " ".join(map(str, features))

# Chatbot interface
def chatbot():
    print("Welcome to the Crop Recommendation Chatbot!")
    print("You will be asked to input various feature values to get a crop recommendation.")

    while True:
        text = input("Type 'start' to enter features, or 'exit' to quit: ").lower()

        if text == 'exit' or text == 'quit':
            print("Goodbye!")
            break
        elif text == 'start':
            # Get features from user
            features = get_features()

            # Predict crop based on the features
            crop = predict_crop(model, tokenizer, features)
            print(f"Recommended crop: {crop}")
        else:
            print("Invalid command. Please type 'start' to begin or 'exit' to quit.")

# Run chatbot
chatbot()


Welcome to the Crop Recommendation Chatbot!
You will be asked to input various feature values to get a crop recommendation.
Type 'start' to enter features, or 'exit' to quit: start
Please enter the following features one by one:
Enter value for Nitrogen (N): 98
Enter value for Phosphorus (P): 89
Enter value for Potassium (K): 70
Enter value for Temperature: 34
Enter value for Humidity: 50
Enter value for pH: 6
Enter value for Rainfall: 90
Recommended crop: coffee
Type 'start' to enter features, or 'exit' to quit: quit
Goodbye!
