In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [3]:
# Load Crop Dataset
crop = pd.read_csv("Dataset/Crop_recommendation.csv")

In [4]:
# Display dataset info
print("Dataset Shape:", crop.shape)
crop.info()
print("Missing Values:", crop.isnull().sum())
print("Duplicate Rows:", crop.duplicated().sum())

Dataset Shape: (2200, 8)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB
Missing Values: N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64
Duplicate Rows: 0


In [5]:
# Display summary statistics
print("Summary Statistics:")
print(crop.describe())
print("Crop Label Distribution:")
print(crop['label'].value_counts())

Summary Statistics:
                 N            P            K  temperature     humidity  \
count  2200.000000  2200.000000  2200.000000  2200.000000  2200.000000   
mean     50.551818    53.362727    48.149091    25.616244    71.481779   
std      36.917334    32.985883    50.647931     5.063749    22.263812   
min       0.000000     5.000000     5.000000     8.825675    14.258040   
25%      21.000000    28.000000    20.000000    22.769375    60.261953   
50%      37.000000    51.000000    32.000000    25.598693    80.473146   
75%      84.250000    68.000000    49.000000    28.561654    89.948771   
max     140.000000   145.000000   205.000000    43.675493    99.981876   

                ph     rainfall  
count  2200.000000  2200.000000  
mean      6.469480   103.463655  
std       0.773938    54.958389  
min       3.504752    20.211267  
25%       5.971693    64.551686  
50%       6.425045    94.867624  
75%       6.923643   124.267508  
max       9.935091   298.560117  
Crop La

In [22]:
# Encode Crop Labels
crop_label_encoder = LabelEncoder()
crop['label'] = crop_label_encoder.fit_transform(crop['label'])

In [21]:
# Save Label Encoder
joblib.dump(crop_label_encoder, "crop_label_encoder.pkl")
print("Label Encoder saved successfully!")

Label Encoder saved successfully!


In [20]:
# Define Features and Target
X = crop.drop(columns=["label"])
y = crop["label"]


In [13]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [15]:
# Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [16]:
# Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))


Model Accuracy: 99.55%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        20
           2       1.00      0.95      0.97        20
           3       1.00      1.00      1.00        20
           4       1.00      1.00      1.00        20
           5       1.00      1.00      1.00        20
           6       1.00      1.00      1.00        20
           7       1.00      1.00      1.00        20
           8       0.95      1.00      0.98        20
           9       1.00      1.00      1.00        20
          10       1.00      1.00      1.00        20
          11       0.95      1.00      0.98        20
          12       1.00      1.00      1.00        20
          13       1.00      1.00      1.00        20
          14       1.00      1.00      1.00        20
          15       1.00      1.00      1.00        20
          16       1.00      1.00      1.00        20
    

In [17]:
# Save Trained Model
joblib.dump(model, "Crops_Recommendation.pkl")
print("Crop Recommendation Model saved successfully!")

Crop Recommendation Model saved successfully!


In [18]:
# Function for Crop Prediction
def predict_crop(N, P, K, temperature, humidity, ph, rainfall):
    model = joblib.load("Crops_Recommendation.pkl")
    input_data = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    prediction = model.predict(input_data)[0]
    crop_encoder = joblib.load("crop_label_encoder.pkl")
    return crop_encoder.inverse_transform([prediction])[0]

In [19]:
# Example Usage
if __name__ == "__main__":
    crop_result = predict_crop(90, 42, 43, 20.5, 82.0, 6.5, 202.0)
    print(f"The recommended crop is: {crop_result}")

The recommended crop is: 20


