In [44]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
import requests
import folium
from folium import plugins
import pickle
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support,roc_curve, auc, accuracy_score


In [2]:
df = pd.read_csv('Crop_Recommendation.csv')
label_encoder = LabelEncoder()

df['Crop'] = label_encoder.fit_transform(df['Crop'])
df['Crop'].unique()


array([20, 11,  3,  9, 18, 13, 14,  2, 10, 19,  1, 12,  7, 21, 15,  0, 16,
       17,  4,  6,  8,  5])

In [32]:
features = np.array([ 1.34934948, -1.0960582, -0.02225388, 0.7692403, 1.05232434, -0.37619899, -1.38984486]).reshape(1, -1)
features

array([[ 1.34934948, -1.0960582 , -0.02225388,  0.7692403 ,  1.05232434,
        -0.37619899, -1.38984486]])

In [5]:
crop_mapping = dict(zip(range(len(label_encoder.classes_)), label_encoder.classes_))
print(crop_mapping) 


{0: 'Apple', 1: 'Banana', 2: 'Blackgram', 3: 'ChickPea', 4: 'Coconut', 5: 'Coffee', 6: 'Cotton', 7: 'Grapes', 8: 'Jute', 9: 'KidneyBeans', 10: 'Lentil', 11: 'Maize', 12: 'Mango', 13: 'MothBeans', 14: 'MungBean', 15: 'Muskmelon', 16: 'Orange', 17: 'Papaya', 18: 'PigeonPeas', 19: 'Pomegranate', 20: 'Rice', 21: 'Watermelon'}


In [None]:
df['Crop'].to_lis

0       20
1       20
2       20
3       20
4       20
        ..
2195     5
2196     5
2197     5
2198     5
2199     5
Name: Crop, Length: 2200, dtype: int64

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['Crop'], axis = 1), df['Crop'], test_size = 0.2, random_state = 42)
print(X_train.shape)
print(y_train.shape)

(1760, 7)
(1760,)


In [8]:
std_scaler = StandardScaler()

X_train_scaled = std_scaler.fit_transform(X_train)
X_test_scaled = std_scaler.transform(X_test)

In [9]:
dt = DecisionTreeClassifier(random_state = 42)

params_dist = {
    'criterion' : ['gini', 'entropy'],
    'max_depth' : [2 * n for n in range(1,21)],
    'min_samples_leaf' : [n for n in range(1,11)]
}

random_search = RandomizedSearchCV(estimator = dt, param_distributions = params_dist,
                                  cv=5, n_jobs = -1, n_iter = 10, scoring = 'accuracy')
random_search.fit(X_train_scaled, y_train.values.ravel())

print(f"Best Parameters: {random_search.best_params_}")
print(f"Best Score: {random_search.best_score_}")

Best Parameters: {'min_samples_leaf': 1, 'max_depth': 10, 'criterion': 'gini'}
Best Score: 0.9863636363636363


In [35]:
model = random_search.best_estimator_

In [30]:
feature_order = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall']
print(X_test_scaled[0])

[ 1.34934948 -1.0960582  -0.02225388  0.7692403   1.05232434 -0.37619899
 -1.38984486]


In [38]:
avg_features =  [ 1.34934948, -1.0960582, -0.02225388, 0.7692403, 1.05232434, -0.37619899, -1.38984486]

In [42]:
std_scaler.transform(pd.DataFrame([
            {
                'Nitrogen' : avg_features[0],
                'Phosphorus' : avg_features[1],
                'Potassium' : avg_features[2],
                'Temperature' : avg_features[3],
                'Humidity' : avg_features[4],
                'pH_Value' : avg_features[5],
                'Rainfall' : avg_features[6],
            }
]))

array([[-1.32315768, -1.6408129 , -0.94311023, -4.87566051, -3.2023103 ,
        -8.86176188, -1.89402903]])

In [37]:
model.predict(features)


array([15])

In [None]:
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)
    

In [None]:
def model_evaluation(yt, yp):
    results = {}
    results['accuracy'] = accuracy_score(yt, yp)
    
    precision, recall, fscore, _ = precision_recall_fscore_support(yt, yp, average = 'weighted')
    results['precision'] = precision
    results['recall'] = recall
    results['fscore'] = fscore
    
    metrics = list(results.keys())
    values = list(results.values())
    
    ax = sns.barplot(x = metrics, y = values, palette = 'viridis')
    plt.title('Model Evaluation Metrics')
    plt.ylim(0,1)
    plt.ylabel('Value')
    
    for i, v in enumerate(values):
        plt.text(i, v/2, f'{v: 0.2f}', ha = 'center', va = 'center', color = 'white',
                fontsize = 12)
    plt.show()

In [36]:
with open("crop_recommendation_model.pkl", "rb") as f:
    model = pickle.load(f)
    

In [None]:
key = '6fa7818ced9d4d988af164055240412'


In [None]:
model.predict()


In [13]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
import pickle

# Load dataset
df = pd.read_csv('Crop_Recommendation.csv')

# Label encoding for the target variable 'Crop'
label_encoder = LabelEncoder()
df['Crop'] = label_encoder.fit_transform(df['Crop'])

# Check unique values of 'Crop' after encoding
print(f"Encoded Crop classes: {df['Crop'].unique()}")

# Split the dataset into features and target variable
X = df.drop(['Crop'], axis=1)
y = df['Crop']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print shapes of training and test sets for validation
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

# Standardize the feature set using StandardScaler
std_scaler = StandardScaler()
X_train_scaled = std_scaler.fit_transform(X_train)
X_test_scaled = std_scaler.transform(X_test)

# Initialize DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=42)

# Define the hyperparameter grid for RandomizedSearchCV
params_dist = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [2 * n for n in range(1, 21)],
    'min_samples_leaf': [n for n in range(1, 11)]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=dt, param_distributions=params_dist,
                                   cv=5, n_jobs=-1, n_iter=10, scoring='accuracy', random_state=42)

# Fit the RandomizedSearchCV to the scaled training data
random_search.fit(X_train_scaled, y_train)

# Output the best parameters and the best score
print(f"Best Parameters: {random_search.best_params_}")
print(f"Best Score: {random_search.best_score_}")

# Get the best estimator (model)
model = random_search.best_estimator_

# Save the trained model to a pickle file
with open('crop_recommendation_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Save the scaler to be used for future scaling during predictions
with open('std_scaler.pkl', 'wb') as file:
    pickle.dump(std_scaler, file)

print("Model and scaler have been saved successfully.")

Encoded Crop classes: [20 11  3  9 18 13 14  2 10 19  1 12  7 21 15  0 16 17  4  6  8  5]
X_train shape: (1760, 7)
y_train shape: (1760,)
Best Parameters: {'min_samples_leaf': 4, 'max_depth': 20, 'criterion': 'gini'}
Best Score: 0.9806818181818182
Model and scaler have been saved successfully.


0       20
1       20
2       20
3       20
4       20
        ..
2195     5
2196     5
2197     5
2198     5
2199     5
Name: Crop, Length: 2200, dtype: int64