### Import the necessary libraries

In [10]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, BertForMaskedLM 
from transformers import Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from datasets import Dataset


### Import the dataset

In [12]:
farmTerms = pd.read_csv('farm_terms.csv')

### Exploratory Data Analysis

In [13]:
farmTerms.head()

Unnamed: 0,Term,Description
0,Agricultureencompasses crop andlivestock...,Agricultureencompasses crop andlivestockproduc...
1,"As of 2021[update],small farmsproduce ab...","As of 2021[update],small farmsproduce about on..."
2,The major agricultural products can be b...,The major agricultural products can be broadly...
3,"Modernagronomy,plant breeding,agrochemic...","Modernagronomy,plant breeding,agrochemicalssuc..."
4,More than 50 billion chickens are raised...,More than 50 billion chickens are raised annua...


In [14]:
farmTerms.tail()

Unnamed: 0,Term,Description
111,In addition to increasing crop yields ag...,In addition to increasing crop yields agronomi...
112,Agronomists study sustainable ways to ma...,Agronomists study sustainable ways to makesoil...
113,"Additionally, agronomists develop method...","Additionally, agronomists develop methods to p..."
114,Agroecologyis the management of agricult...,Agroecologyis the management of agricultural s...
115,Theoretical production ecologyis the qua...,Theoretical production ecologyis the quantitat...


In [16]:
farmTerms.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Term         116 non-null    object
 1   Description  116 non-null    object
dtypes: object(2)
memory usage: 1.9+ KB


### Text Tokenization



In [None]:
# Prepare dataset for Hugging Face model
def tokenize_function(examples):
    return tokenizer(examples["Farm Terms"], padding="max_length", truncation=True)

In [None]:
# Tokenize data
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
dataset = Dataset.from_pandas(farmTerms)
tokenized_dataset = dataset.map(tokenize_function, batched=True)


In [None]:
# Load pre-trained BERT model
model = BertForMaskedLM.from_pretrained("bert-base-uncased")

In [None]:
# Set training arguments
training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=2,
    per_device_train_batch_size=16,
    save_steps=10_000,
    save_total_limit=2,
)

In [None]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)


In [None]:
# Fine-tune the model
trainer.train()

In [None]:
# Accuracy
accuracy = accuracy_score(y_true, y_pred)

# Precision, Recall, F1 Score
precision = precision_score(y_true, y_pred, average='binary', zero_division=0)
recall = recall_score(y_true, y_pred, average='binary')
f1 = f1_score(y_true, y_pred, average='binary')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

In [None]:
# Save the fine-tuned model
model.save_pretrained("farm_terms_finetuned_model")
tokenizer.save_pretrained("farm_terms_finetuned_model")

print("Model fine-tuned and saved successfully!")