In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

data = pd.read_csv('Crop_recommendation.csv')

# Features and target variable
X = data[['N', 'P', 'K', 'soil_type']]  
y = data['label']

# pipeline for one-hot encoding
preprocessor = ColumnTransformer(
    transformers=[
        ('soil', OneHotEncoder(), ['soil_type'])  
    ],
    remainder='passthrough'  
)


model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)

# Print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the model
joblib.dump(model, 'random_forest_model_with_soil_type.joblib')

# Function to get user input and make predictions
def get_user_input():
    print("Enter the following soil parameters:")
    nitrogen = float(input("Nitrogen (N, kg/ha): "))
    phosphorus = float(input("Phosphorus (P, kg/ha): "))
    potassium = float(input("Potassium (K, kg/ha): "))
    soil_type = input("Soil type: ")  # input for soil type

    # Create a DataFrame for the input
    input_data = pd.DataFrame([[nitrogen, phosphorus, potassium, soil_type]],
                              columns=['N', 'P', 'K', 'soil_type'])
    return input_data

# Get user input
user_input = get_user_input()

# Make prediction
predicted_crop_type = model.predict(user_input)

# Output the recommendation
print(f"The recommended crop type for the given parameters is: {predicted_crop_type[0]}")

Model Accuracy: 0.82
Confusion Matrix:
[[12  0  0  0  0  0  0 11  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 26  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 7  0  0  0  0  0  0  7  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 23  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  7 13  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  3  8  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 21  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0 11 13  0  0  0  0  0  0

Nitrogen (N, kg/ha):  10 
Phosphorus (P, kg/ha):  5
Potassium (K, kg/ha):  20
Soil type:  Loamy


The recommended crop type for the given parameters is: orange


In [2]:
import joblib

# After training your Random Forest model
joblib.dump(model, 'random_forest_model.joblib')

['random_forest_model.joblib']