In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

df = pd.read_csv("Crop_recommendation.csv")  

print("Missing Values:\n", df.isnull().sum())

df.dropna(inplace=True)

# Encode categorical labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Feature Scaling
scaler = StandardScaler()
numerical_features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
df[numerical_features] = scaler.fit_transform(df[numerical_features])

print(df.head())


Missing Values:
 N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64
          N         P         K  temperature  humidity        ph  rainfall  \
0  1.068797 -0.344551 -0.101688    -0.935587  0.472666  0.043302  1.810361   
1  0.933329  0.140616 -0.141185    -0.759646  0.397051  0.734873  2.242058   
2  0.255986  0.049647 -0.081939    -0.515898  0.486954  1.771510  2.921066   
3  0.635298 -0.556811 -0.160933     0.172807  0.389805  0.660308  2.537048   
4  0.743673 -0.344551 -0.121436    -1.083647  0.454792  1.497868  2.898373   

   label  
0     20  
1     20  
2     20  
3     20  
4     20  


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = pd.read_csv('Crop_recommendation.csv')

X = data.drop('label', axis=1)
y = data['label']

# First split (Train + Validation vs. Unseen Test)
X_train_valid, X_unseen, y_train_valid, y_unseen = train_test_split(X, y, test_size=0.10, random_state=42, stratify=y)

# Split the Train + Validation into Training (80%) and Validation (10%)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.10, random_state=42, stratify=y_train_valid)

print(f"Training Data: {X_train.shape}, Validation Data: {X_valid.shape}, Unseen Test Data: {X_unseen.shape}")


Training Data: (1782, 7), Validation Data: (198, 7), Unseen Test Data: (220, 7)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Model training
model = RandomForestClassifier(n_estimators=50, max_depth=20, min_samples_leaf=1, min_samples_split=5, random_state=42)
model.fit(X_train, y_train)

# Validation
y_valid_pred = model.predict(X_valid)
conf_matrix = confusion_matrix(y_valid, y_valid_pred)
class_report = classification_report(y_valid, y_valid_pred)
valid_accuracy = accuracy_score(y_valid, y_valid_pred)
print(f"Validation Accuracy: {valid_accuracy:.4f}")
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)

Validation Accuracy: 0.9949

Confusion Matrix:
 [[9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 8 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0]
 [0 0 0 0 0 

In [None]:
# Predict on unseen test data
y_unseen_pred = model.predict(X_unseen)

# Evaluate performance
unseen_accuracy = accuracy_score(y_unseen, y_unseen_pred)
print(f"Unseen Test Accuracy: {unseen_accuracy:.4f}")
conf_matrix = confusion_matrix(y_unseen, y_unseen_pred)
class_report = classification_report(y_unseen, y_unseen_pred)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)

Unseen Test Accuracy: 0.9909

Confusion Matrix:
 [[10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  9  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0 10  0  0  0  

In [None]:
import joblib

joblib.dump(model, "crop_recommendation_model.joblib")


['crop_recommendation_model.joblib']

In [None]:
import numpy as np
import joblib

model = joblib.load("crop_recommendation_model.joblib")

feature_names = ["N", "P", "K", "temperature", "humidity", "ph", "rainfall"]

def get_user_input():
    user_data = []
    print("Enter the values for the following features:")
    for feature in feature_names:
        value = float(input(f"{feature}: "))
        user_data.append(value)
    
    return np.array(user_data).reshape(1, -1)

user_input = get_user_input()

probabilities = model.predict_proba(user_input)

crop_labels = model.classes_

threshold = 0.75  

filtered_crops = [(crop_labels[idx], probabilities[0][idx]) for idx in np.argsort(probabilities[0])[::-1] if probabilities[0][idx] >= threshold]

if filtered_crops:
    print("\nRecommended Crops (Above 75% Confidence):")
    for crop, confidence in filtered_crops:
        print(f"{crop}: {confidence * 100:.2f}% confidence")
else:
    print("\nNo crops found with confidence above 75%. Try adjusting your inputs.")


Enter the values for the following features:

Recommended Crops (Above 75% Confidence):
rice: 96.40% confidence


