In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import joblib


In [6]:
data = pd.read_csv("Crop_recommendation.csv")
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [7]:
data.shape

(2200, 8)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [9]:
data.isnull().sum()

Unnamed: 0,0
N,0
P,0
K,0
temperature,0
humidity,0
ph,0
rainfall,0
label,0


In [10]:
X = data.drop("label", axis=1)
y = data["label"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [12]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("Model training completed")

Model training completed


In [13]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy*100, "%")

Accuracy: 99.31818181818181 %


In [14]:
joblib.dump(model, "model.pkl")
print("Model saved successfully")

Model saved successfully


In [16]:
sample = pd.DataFrame(
    [[90, 42, 43, 21, 82, 6.5, 202]],
    columns=["N", "P", "K", "temperature", "humidity", "ph", "rainfall"]
)
prediction = model.predict(sample)
print("Input Values:")
print(sample)
print("\nRecommended Crop:", prediction[0])

Input Values:
    N   P   K  temperature  humidity   ph  rainfall
0  90  42  43           21        82  6.5       202

Recommended Crop: rice


In [17]:
samples = pd.DataFrame(
    [
        [90, 42, 43, 21, 82, 6.5, 202],
        [120, 40, 40, 28, 70, 6.8, 150],
        [70, 50, 45, 26, 65, 6.0, 80]
    ],
    columns=["N", "P", "K", "temperature", "humidity", "ph", "rainfall"]
)
predictions = model.predict(samples)
print("Input Values and Recommended Crops:\n")
for i in range(len(samples)):
    print(samples.iloc[i].to_dict())
    print("Recommended Crop:", predictions[i])
    print()

Input Values and Recommended Crops:

{'N': 90.0, 'P': 42.0, 'K': 43.0, 'temperature': 21.0, 'humidity': 82.0, 'ph': 6.5, 'rainfall': 202.0}
Recommended Crop: rice

{'N': 120.0, 'P': 40.0, 'K': 40.0, 'temperature': 28.0, 'humidity': 70.0, 'ph': 6.8, 'rainfall': 150.0}
Recommended Crop: coffee

{'N': 70.0, 'P': 50.0, 'K': 45.0, 'temperature': 26.0, 'humidity': 65.0, 'ph': 6.0, 'rainfall': 80.0}
Recommended Crop: maize

