<a href="https://colab.research.google.com/github/Akansha3921/project-/blob/main/soil_crop_Recomendation_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import tkinter as tk
from tkinter import messagebox
import os

# Step 1: Load the dataset
def load_data():
    dataset_path = "soil_data.csv"
    data = pd.read_csv(dataset_path)
    return data

# Step 2: Preprocess the data
def preprocess_data(data):
    X = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph']]
    y = data['label']
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train the model
def train_model(X_train, y_train):
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    return model

# Step 4: Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 5: Save the model
def save_model(model, filename="crop_recommendation_model.pkl"):
    with open(filename, 'wb') as file:
        pickle.dump(model, file)

# Step 6: Load the model
def load_model(filename="crop_recommendation_model.pkl"):
    with open(filename, 'rb') as file:
        return pickle.load(file)

# Step 7: Recommend crop
def recommend_crop_cli():
    try:
        N = float(input("Enter Nitrogen (N): "))
        P = float(input("Enter Phosphorus (P): "))
        K = float(input("Enter Potassium (K): "))
        temperature = float(input("Enter Temperature: "))
        humidity = float(input("Enter Humidity: "))
        pH = float(input("Enter pH: "))

        model = load_model()
        input_data = [[N, P, K, temperature, humidity, ph]]
        prediction = model.predict(input_data)

        print(f"Recommended Crop: {prediction[0]}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Step 8: Create a GUI for crop recommendation
def create_gui():
    global entry_N, entry_P, entry_K, entry_temperature, entry_humidity, entry_pH

    root = tk.Tk()
    root.title("Crop Recommendation System")

    tk.Label(root, text="Nitrogen (N):").grid(row=0, column=0, padx=10, pady=5)
    entry_N = tk.Entry(root)
    entry_N.grid(row=0, column=1, padx=10, pady=5)

    tk.Label(root, text="Phosphorus (P):").grid(row=1, column=0, padx=10, pady=5)
    entry_P = tk.Entry(root)
    entry_P.grid(row=1, column=1, padx=10, pady=5)

    tk.Label(root, text="Potassium (K):").grid(row=2, column=0, padx=10, pady=5)
    entry_K = tk.Entry(root)
    entry_K.grid(row=2, column=1, padx=10, pady=5)

    tk.Label(root, text="Temperature:").grid(row=3, column=0, padx=10, pady=5)
    entry_temperature = tk.Entry(root)
    entry_temperature.grid(row=3, column=1, padx=10, pady=5)

    tk.Label(root, text="Humidity:").grid(row=4, column=0, padx=10, pady=5)
    entry_humidity = tk.Entry(root)
    entry_humidity.grid(row=4, column=1, padx=10, pady=5)

    tk.Label(root, text="pH:").grid(row=5, column=0, padx=10, pady=5)
    entry_ph = tk.Entry(root)
    entry_ph.grid(row=5, column=1, padx=10, pady=5)

    tk.Button(root, text="Recommend Crop", command=recommend_crop).grid(row=6, column=0, columnspan=2, pady=10)

    root.mainloop()

# Main function
def main():
    data = load_data()
    X_train, X_test, y_train, y_test = preprocess_data(data)
    model = train_model(X_train, y_train)
    evaluate_model(model, X_test, y_test)
    save_model(model)
    print("Model training complete and saved.")

if __name__ == "__main__":
    main()
    if os.environ.get("DISPLAY"):
        create_gui()
    else:
        print("No display found. Switching to command-line interface.")
        recommend_crop_cli()


Accuracy: 0.9727272727272728
Classification Report:
               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.90      0.95        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.88      0.96      0.92        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.69      1.00      0.81        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       0.95      0.88      0.91        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00     

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import os

# Check if the dataset file exists
dataset_path = "soil_data.csv"  # Replace with the actual path
if not os.path.exists(dataset_path):
    raise FileNotFoundError(f"The dataset file '{dataset_path}' was not found. Please make sure the file exists in the specified path.")

# Load your dataset
data = pd.read_csv(dataset_path)

# Feature columns and target column
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph']
target = 'label'  # The label column contains the crop names

# Split the dataset into features (X) and target (y)
X = data[features]
y = data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree Classifier model
model = DecisionTreeClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the trained model to a file
with open("decision_tree_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("Model saved as 'decision_tree_model.pkl'")


Accuracy: 96.82%
Classification Report:
               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      0.95      0.95        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.88      0.91      0.89        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.79      1.00      0.88        11
       maize       1.00      1.00      1.00        21
       mango       1.00      0.89      0.94        19
   mothbeans       1.00      0.83      0.91        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import os

# Define the dataset path
dataset_path = "soil_data.csv"

# Example dataset creation if file does not exist
if not os.path.exists(dataset_path):
    print(f"The dataset file '{dataset_path}' was not found. Creating an example dataset...")
    example_data = {
        'N': [90, 85, 78, 92],
        'P': [42, 40, 35, 45],
        'K': [60, 55, 50, 65],
        'temperature': [30, 32, 28, 31],
        'humidity': [80, 85, 75, 78],
        'ph': [6.5, 6.8, 7.0, 6.2],
        'label': ['Wheat', 'Rice', 'Maize', 'Barley']
    }
    pd.DataFrame(example_data).to_csv(dataset_path, index=False)
    print(f"Example dataset saved as '{dataset_path}'. Please replace it with your actual dataset if needed.")

# Load your dataset
data = pd.read_csv(dataset_path)

# Feature columns and target column
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph']
target = 'label'  # The label column contains the crop names

# Split the dataset into features (X) and target (y)
X = data[features]
y = data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier model
model = RandomForestClassifier(random_state=42, n_estimators=100)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the trained model to a file
with open("random_forest_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("Model saved as 'random_forest_model.pkl'")

# Function to predict crop based on user input
def predict_crop(N, P, K, temperature, humidity, ph):
    with open("random_forest_model.pkl", "rb") as file:
        loaded_model = pickle.load(file)
    input_features = pd.DataFrame([[N, P, K, temperature, humidity, ph]], columns=features)
    prediction = loaded_model.predict(input_features)
    return prediction[0]

# Example usage of the function
if __name__ == "__main__":
    # Example values for N, P, K, temperature, humidity, and ph
    N = 90
    P = 40
    K = 60
    temperature = 30
    humidity = 80
    ph = 6.5

    recommended_crop = predict_crop(N, P, K, temperature, humidity, ph)
    print(f"The recommended crop for the given values is: {recommended_crop}")


Accuracy: 97.27%
Classification Report:
               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.90      0.95        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.88      0.96      0.92        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.69      1.00      0.81        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       0.95      0.88      0.91        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1

In [32]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("soil_data.csv")

# Check the columns in the dataset to ensure they match
print("Columns in the dataset:", data.columns)

# Rename the pH column to lowercase 'ph'
data = data.rename(columns={"pH": "ph"})

# Check if the 'humidity' column is available and print the first few rows for confirmation
print(data.head())

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Columns in the dataset: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'label'], dtype='object')
    N   P   K  temperature   humidity        ph label
0  90  42  43    20.879744  82.002744  6.502985  rice
1  85  58  41    21.770462  80.319644  7.038096  rice
2  60  55  44    23.004459  82.320763  7.840207  rice
3  74  35  40    26.491096  80.158363  6.980401  rice
4  78  42  42    20.130175  81.604873  7.628473  rice
Accuracy: 90.91%
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.85      0.85      0.85        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.65      0.65      0.65 

In [33]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("soil_data.csv")

# Check the columns in the dataset to ensure they match
print("Columns in the dataset:", data.columns)

# Rename the pH column to lowercase 'ph'
data = data.rename(columns={"pH": "ph"})

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Support Vector Machine model
model = SVC(kernel='linear')  # You can use 'linear', 'rbf', 'poly', etc.
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Columns in the dataset: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'label'], dtype='object')
Accuracy: 92.50%
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.84      0.80      0.82        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.71      0.74      0.72        23
 kidneybeans       0.95      1.00      0.98        20
      lentil       0.56      0.91      0.69        11
       maize       1.00      0.95      0.98        21
       mango       0.95      1.00      0.97        19
   mothbeans       0.89      0.71      0.79        24
    mungbean       1.00      1.00      1.00 

In [34]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("soil_data.csv")

# Check the columns in the dataset to ensure they match
print("Columns in the dataset:", data.columns)

# Rename the pH column to lowercase 'ph'
data = data.rename(columns={"pH": "ph"})

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the K-Nearest Neighbors model
model = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Columns in the dataset: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'label'], dtype='object')
Accuracy: 90.00%
Classification Report:
              precision    recall  f1-score   support

       apple       0.96      1.00      0.98        23
      banana       1.00      1.00      1.00        21
   blackgram       0.78      0.90      0.84        20
    chickpea       1.00      1.00      1.00        26
     coconut       0.96      1.00      0.98        27
      coffee       0.89      1.00      0.94        17
      cotton       0.89      1.00      0.94        17
      grapes       1.00      0.93      0.96        14
        jute       0.67      0.78      0.72        23
 kidneybeans       0.91      1.00      0.95        20
      lentil       0.47      0.73      0.57        11
       maize       1.00      0.86      0.92        21
       mango       0.86      1.00      0.93        19
   mothbeans       0.94      0.67      0.78        24
    mungbean       1.00      1.00      1.00 

In [37]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("soil_data.csv")

# Check the columns in the dataset to ensure they match
print("Columns in the dataset:", data.columns)

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the K-Nearest Neighbors model
model = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Columns in the dataset: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'label'], dtype='object')
Accuracy: 90.00%
Classification Report:
              precision    recall  f1-score   support

       apple       0.96      1.00      0.98        23
      banana       1.00      1.00      1.00        21
   blackgram       0.78      0.90      0.84        20
    chickpea       1.00      1.00      1.00        26
     coconut       0.96      1.00      0.98        27
      coffee       0.89      1.00      0.94        17
      cotton       0.89      1.00      0.94        17
      grapes       1.00      0.93      0.96        14
        jute       0.67      0.78      0.72        23
 kidneybeans       0.91      1.00      0.95        20
      lentil       0.47      0.73      0.57        11
       maize       1.00      0.86      0.92        21
       mango       0.86      1.00      0.93        19
   mothbeans       0.94      0.67      0.78        24
    mungbean       1.00      1.00      1.00 

In [40]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("soil_data.csv")

# Check the columns in the dataset to ensure they match
print("Columns in the dataset:", data.columns)

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Neural Network (MLPClassifier)
model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)  # You can adjust the number of neurons and iterations
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Function to recommend the best crop based on input soil characteristics
def recommend_crop(N, P, K, ph, temperature, humidity):
    input_data = pd.DataFrame([[N, P, K, ph, temperature, humidity]], columns=['N', 'P', 'K', 'ph', 'temperature', 'humidity'])
    input_data_scaled = scaler.transform(input_data)  # Apply scaling to the input data
    prediction = model.predict(input_data_scaled)  # Predict the crop
    print(f"Recommended crop for the given soil characteristics: {prediction[0]}")

# Example input for recommendation
N = float(input("Enter Nitrogen content (N): "))
P = float(input("Enter Phosphorus content (P): "))
K = float(input("Enter Potassium content (K): "))
ph = float(input("Enter pH value: "))
temperature = float(input("Enter Temperature (°C): "))
humidity = float(input("Enter Humidity (%): "))

# Recommend the crop based on the input values
recommend_crop(N, P, K, ph, temperature, humidity)


Columns in the dataset: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'label'], dtype='object')
Accuracy: 94.09%
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.85      0.92        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.74      0.74      0.74        23
 kidneybeans       0.95      1.00      0.98        20
      lentil       0.52      1.00      0.69        11
       maize       1.00      0.95      0.98        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.79      0.88        24
    mungbean       1.00      1.00      1.00 

In [42]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Load the dataset
data = pd.read_csv("soil_data.csv")

# Check the columns in the dataset to ensure they match
print("Columns in the dataset:", data.columns)

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Encode the target variable (crop labels) to numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Function to recommend the best crop based on input soil characteristics
def recommend_crop(N, P, K, ph, temperature, moisture):
    input_data = pd.DataFrame([[N, P, K, ph, temperature, moisture]], columns=['N', 'P', 'K', 'ph', 'temperature', 'humidity'])
    input_data_scaled = scaler.transform(input_data)  # Apply scaling to the input data
    prediction = model.predict(input_data_scaled)  # Predict the crop
    predicted_crop = label_encoder.inverse_transform(prediction)  # Convert numeric prediction back to crop label
    print(f"Recommended crop for the given soil characteristics: {predicted_crop[0]}")

# Example input for recommendation
n = 83
p = 95
K = 50
temperature = 26.51
moisture = 77.79
ph = 5.50

# Recommend the crop based on the input values
recommend_crop(n, p, K, ph, temperature, moisture)


Columns in the dataset: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'label'], dtype='object')


Parameters: { "use_label_encoder" } are not used.



Accuracy: 96.59%
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      0.96      0.98        27
      coffee       1.00      1.00      1.00        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.83      0.87      0.85        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.79      1.00      0.88        11
       maize       1.00      0.95      0.98        21
       mango       1.00      1.00      1.00        19
   mothbeans       0.96      0.92      0.94        24
    mungbean       0.95      1.00      0.97        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1.

In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import lightgbm as lgb
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning)  # Suppress UserWarnings (like LightGBM training logs)
warnings.filterwarnings("ignore", category=FutureWarning)  # Suppress FutureWarnings (like scikit-learn deprecation)

# Load the dataset from the CSV file
data = pd.read_csv("soil_data.csv")

# Check the structure of the data to ensure it's loaded properly
print(data.head())

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Encode the target variable (crop labels) to numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the LightGBM model
model = lgb.LGBMClassifier(
    random_state=42,
    num_leaves=31,   # Controls the complexity of the model
    learning_rate=0.05,  # A common learning rate for better convergence
    n_estimators=100,  # Number of boosting iterations
    verbosity=-1  # Suppress LightGBM output to avoid log messages
)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Function to recommend the best crop based on input soil characteristics
def recommend_crop(N, P, K, ph, temperature, moisture):
    input_data = pd.DataFrame([[N, P, K, ph, temperature, moisture]], columns=['N', 'P', 'K', 'ph', 'temperature', 'humidity'])
    input_data_scaled = scaler.transform(input_data)  # Apply scaling to the input data
    prediction = model.predict(input_data_scaled)  # Predict the crop
    predicted_crop = label_encoder.inverse_transform(prediction)  # Convert numeric prediction back to crop label
    print(f"Recommended crop for the given soil characteristics: {predicted_crop[0]}")

# Example input for recommendation
n = 83
p = 95
K = 50
temperature = 26.51
moisture = 77.79
ph = 5.50

# Recommend the crop based on the input values
recommend_crop(n, p, K, ph, temperature, moisture)


    N   P   K  temperature   humidity        ph label
0  90  42  43    20.879744  82.002744  6.502985  rice
1  85  58  41    21.770462  80.319644  7.038096  rice
2  60  55  44    23.004459  82.320763  7.840207  rice
3  74  35  40    26.491096  80.158363  6.980401  rice
4  78  42  42    20.130175  81.604873  7.628473  rice
Accuracy: 96.59%
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      0.95      0.95        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      0.96      0.98        27
      coffee       1.00      1.00      1.00        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.84      0.91      0.88        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.73      1.00      0.

In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning)  # Suppress UserWarnings
warnings.filterwarnings("ignore", category=FutureWarning)  # Suppress FutureWarnings

# Load the dataset from the CSV file
data = pd.read_csv("soil_data.csv")

# Check the structure of the data to ensure it's loaded properly
print(data.head())

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Encode the target variable (crop labels) to numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Random Forest Classifier model
model = RandomForestClassifier(
    random_state=42,
    n_estimators=100,  # Number of trees in the forest
    max_depth=None,  # No limit on the depth of trees
    min_samples_split=2,  # Minimum number of samples to split a node
    min_samples_leaf=1,  # Minimum number of samples to be at a leaf node
    n_jobs=-1  # Use all cores for training
)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Function to recommend the best crop based on input soil characteristics
def recommend_crop(N, P, K, ph, temperature, moisture):
    input_data = pd.DataFrame([[N, P, K, ph, temperature, moisture]], columns=['N', 'P', 'K', 'ph', 'temperature', 'humidity'])
    input_data_scaled = scaler.transform(input_data)  # Apply scaling to the input data
    prediction = model.predict(input_data_scaled)  # Predict the crop label (numeric value)
    predicted_crop = label_encoder.inverse_transform([prediction[0]])  # Convert numeric prediction back to crop label
    print(f"Recommended crop for the given soil characteristics: {predicted_crop[0]}")

# Example input for recommendation
n = 83
p = 95
K = 50
temperature = 26.51
moisture = 77.79
ph = 5.50

# Recommend the crop based on the input values
recommend_crop(n, p, K, ph, temperature, moisture)


    N   P   K  temperature   humidity        ph label
0  90  42  43    20.879744  82.002744  6.502985  rice
1  85  58  41    21.770462  80.319644  7.038096  rice
2  60  55  44    23.004459  82.320763  7.840207  rice
3  74  35  40    26.491096  80.158363  6.980401  rice
4  78  42  42    20.130175  81.604873  7.628473  rice
Accuracy: 97.50%

Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.88      0.91      0.89        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.73      1.00      0

In [48]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning)  # Suppress UserWarnings
warnings.filterwarnings("ignore", category=FutureWarning)  # Suppress FutureWarnings

# Load the dataset from the CSV file
data = pd.read_csv("soil_data.csv")

# Check the structure of the data to ensure it's loaded properly
print(data.head())

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Encode the target variable (crop labels) to numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Gradient Boosting Regressor model
model = GradientBoostingRegressor(
    n_estimators=100,  # Number of boosting stages to perform
    learning_rate=0.1,  # The contribution of each tree to the final model
    max_depth=3,  # Maximum depth of the individual trees
    random_state=42
)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model performance
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")

# Function to recommend the best crop based on input soil characteristics
def recommend_crop(N, P, K, ph, temperature, moisture):
    input_data = pd.DataFrame([[N, P, K, ph, temperature, moisture]], columns=['N', 'P', 'K', 'ph', 'temperature', 'humidity'])
    input_data_scaled = scaler.transform(input_data)  # Apply scaling to the input data
    prediction = model.predict(input_data_scaled)  # Predict the crop label (numeric value)
    predicted_crop = label_encoder.inverse_transform([int(round(prediction[0]))])  # Convert numeric prediction back to crop label
    print(f"Recommended crop for the given soil characteristics: {predicted_crop[0]}")

# Example input for recommendation
n = 83
p = 95
K = 50
temperature = 26.51
moisture = 77.79
ph = 5.50

# Recommend the crop based on the input values
recommend_crop(n, p, K, ph, temperature, moisture)


    N   P   K  temperature   humidity        ph label
0  90  42  43    20.879744  82.002744  6.502985  rice
1  85  58  41    21.770462  80.319644  7.038096  rice
2  60  55  44    23.004459  82.320763  7.840207  rice
3  74  35  40    26.491096  80.158363  6.980401  rice
4  78  42  42    20.130175  81.604873  7.628473  rice
Mean Absolute Error: 1.5948408437228512
R-squared: 0.8509045675986933
Recommended crop for the given soil characteristics: blackgram


In [49]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning)  # Suppress UserWarnings
warnings.filterwarnings("ignore", category=FutureWarning)  # Suppress FutureWarnings

# Load the dataset from the CSV file
data = pd.read_csv("soil_data.csv")

# Check the structure of the data to ensure it's loaded properly
print(data.head())

# Feature selection - assuming N, P, K, ph, temperature, humidity are the input features
X = data[['N', 'P', 'K', 'ph', 'temperature', 'humidity']]

# Target variable - using 'label' as the column indicating the suitable crop
y = data['label']

# Encode the target variable (crop labels) to numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling - Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Gradient Boosting Classifier model
model = GradientBoostingClassifier(
    n_estimators=100,  # Number of boosting stages to perform
    learning_rate=0.1,  # The contribution of each tree to the final model
    max_depth=3,  # Maximum depth of the individual trees
    random_state=42
)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Classification report (precision, recall, f1-score)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Function to recommend the best crop based on input soil characteristics
def recommend_crop(N, P, K, ph, temperature, moisture):
    input_data = pd.DataFrame([[N, P, K, ph, temperature, moisture]], columns=['N', 'P', 'K', 'ph', 'temperature', 'humidity'])
    input_data_scaled = scaler.transform(input_data)  # Apply scaling to the input data
    prediction = model.predict(input_data_scaled)  # Predict the crop label (numeric value)
    predicted_crop = label_encoder.inverse_transform([prediction[0]])  # Convert numeric prediction back to crop label
    print(f"Recommended crop for the given soil characteristics: {predicted_crop[0]}")

# Example input for recommendation
n = 83
p = 95
K = 50
temperature = 26.51
moisture = 77.79
ph = 5.50

# Recommend the crop based on the input values
recommend_crop(n, p, K, ph, temperature, moisture)


    N   P   K  temperature   humidity        ph label
0  90  42  43    20.879744  82.002744  6.502985  rice
1  85  58  41    21.770462  80.319644  7.038096  rice
2  60  55  44    23.004459  82.320763  7.840207  rice
3  74  35  40    26.491096  80.158363  6.980401  rice
4  78  42  42    20.130175  81.604873  7.628473  rice
Accuracy: 95.68%

Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      0.96      0.98        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.78      0.91      0.84        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.61      1.00      0

In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier

# Load the dataset from the CSV file
df = pd.read_csv('soil_data.csv')

# Check the first few rows of the dataset to understand its structure
print(df.head())

# Encode the target labels as integers
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Features and target
X = df.drop('label', axis=1)  # Features
y = df['label']  # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the CatBoost model
model = CatBoostClassifier(iterations=500, learning_rate=0.1, depth=6, random_state=42, cat_features=[])
model.fit(X_train_scaled, y_train)

# Accuracy on test set
accuracy = model.score(X_test_scaled, y_test) * 100
print(f"Model Accuracy: {accuracy:.2f}%")

# Function to predict crop based on user input
def recommend_crop(N, P, K, temperature, humidity, ph):
    # Prepare the input features in the same format as the training data
    input_data = pd.DataFrame([[N, P, K, temperature, humidity, ph]], columns=X.columns)

    # Scale the input features
    input_scaled = scaler.transform(input_data)

    # Predict the label
    predicted_label = model.predict(input_scaled)[0]

    # Convert the numeric label back to the crop name
    predicted_crop = label_encoder.inverse_transform([predicted_label])[0]

    return predicted_crop

# Example usage - replace with user input or interactive method
N = float(input("Enter Nitrogen (N) value: "))
P = float(input("Enter Phosphorus (P) value: "))
K = float(input("Enter Potassium (K) value: "))
temperature = float(input("Enter temperature value: "))
humidity = float(input("Enter humidity value: "))
ph = float(input("Enter pH value: "))

# Recommend crop
recommended_crop = recommend_crop(N, P, K, temperature, humidity, ph)
print(f"The recommended crop is: {recommended_crop}")


    N   P   K  temperature   humidity        ph label
0  90  42  43    20.879744  82.002744  6.502985  rice
1  85  58  41    21.770462  80.319644  7.038096  rice
2  60  55  44    23.004459  82.320763  7.840207  rice
3  74  35  40    26.491096  80.158363  6.980401  rice
4  78  42  42    20.130175  81.604873  7.628473  rice
0:	learn: 2.4979295	total: 67.6ms	remaining: 33.7s
1:	learn: 2.1828617	total: 98.2ms	remaining: 24.5s
2:	learn: 1.8875509	total: 133ms	remaining: 22s
3:	learn: 1.6451391	total: 169ms	remaining: 21s
4:	learn: 1.4695191	total: 202ms	remaining: 20s
5:	learn: 1.3318852	total: 272ms	remaining: 22.4s
6:	learn: 1.1874715	total: 303ms	remaining: 21.4s
7:	learn: 1.0800975	total: 346ms	remaining: 21.3s
8:	learn: 0.9916743	total: 393ms	remaining: 21.4s
9:	learn: 0.9134332	total: 431ms	remaining: 21.1s
10:	learn: 0.8467523	total: 485ms	remaining: 21.6s
11:	learn: 0.7802408	total: 526ms	remaining: 21.4s
12:	learn: 0.7323859	total: 563ms	remaining: 21.1s
13:	learn: 0.6839107	total: