<a href="https://colab.research.google.com/github/ManviNarang01/AgroInsight/blob/main/CropRecommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

SVM Algorithm with GridSearchCV
Accuracy: 97.95%

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [24]:
# Loading the dataset
data = pd.read_csv('Crop_recommendation.csv')

In [25]:
# Data Preprocessing
# Separating features (X) and target variable (y)
X = data.drop(columns=['label'])
y = data['label']

# Encoding
le = LabelEncoder()
y = le.fit_transform(y)

In [59]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('numeric', StandardScaler(), X.columns)  # Standardize numeric features
    ])

In [58]:
# Creating the model pipeline with hyperparameter tuning using GridSearchCV
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', SVC(random_state=42))
])

In [28]:
# Defining hyperparameters to tune
param_grid = {
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf', 'poly'],
    'classifier__gamma': ['scale', 'auto']
}

In [29]:
# Using GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Getting the best model from hyperparameter tuning
best_model = grid_search.best_estimator_

In [30]:
# Training the best model
best_model.fit(X_train, y_train)

# Making predictions on the test set
y_pred = best_model.predict(X_test)

# Evaluating the model ussing accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.9795454545454545
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       0.95      0.95      0.95        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       1.00      0.94      0.97        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.85      1.00      0.92        23
           9       0.91      1.00      0.95        20
          10       0.85      1.00      0.92        11
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        19
          13       1.00      0.96      0.98        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      

In [31]:
# Visualizing the support vectors and their importance (coefficients)
if best_model.named_steps['classifier'].kernel == 'linear':
    support_vectors = best_model.named_steps['classifier'].support_vectors_
    coefficients = best_model.named_steps['classifier'].coef_
    print('Support Vectors:')
    print(support_vectors)
    print('Coefficients:')
    print(coefficients)


In [32]:
# Making predictions for new changeable data
new_data = pd.DataFrame({
    'N': [50],
    'P': [20],
    'K': [0],
    'temperature': [0],
    'humidity': [0],
    'ph': [0],
    'rainfall': [100]
})

# Prediciting the final crop
predicted_crop = le.inverse_transform(best_model.predict(new_data))
print(f'Recommended Crop: {predicted_crop[0]}')


Recommended Crop: mothbeans


Decision Tree Classifier
Accuracy: 98.63%

In [48]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline

In [49]:
# loading the data set
data = pd.read_csv('Crop_recommendation.csv')

In [50]:
# Data Preprocessing
# Separating features as (X)  and target variable as (y)
X = data.drop(columns=['label'])
y = data['label']

In [51]:
# Encoding the categorical target variable (y) into numerical labels using LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)


In [52]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating the model pipeline
model = make_pipeline(DecisionTreeClassifier(random_state=42))


In [53]:
# Training the model on training data
model.fit(X_train, y_train)

In [54]:
# Making predictions on the test set
y_pred = model.predict(X_test)

In [55]:
# Evaluating the model using accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.9863636363636363
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       0.95      1.00      0.98        20
           3       1.00      1.00      1.00        26
           4       0.96      1.00      0.98        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      0.96      0.94        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      0.95      0.98        21
          12       1.00      1.00      1.00        19
          13       1.00      0.92      0.96        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      

In [56]:
# Making predictions for new changeable data
new_data = pd.DataFrame({
    'N': [50],
    'P': [25],
    'K': [0],
    'temperature': [0],
    'humidity': [0],
    'ph': [0],
    'rainfall': [100]
})

In [57]:
# Prediciting the final crop
predicted_crop = le.inverse_transform(model.predict(new_data))
print(f'Recommended Crop: {predicted_crop[0]}')

Recommended Crop: kidneybeans


Decision Tree + Hyperparameter tuning using GridSearchCV and feature scaling with StandardScaler
Accuracy: 98.63%

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [45]:
# Loading the dataset
data = pd.read_csv('Crop_recommendation.csv')

# Data Preprocessing
# Separating features (X) and target variable (y)
X = data.drop(columns=['label'])
y = data['label']

In [44]:
# Encoding
le = LabelEncoder()
y = le.fit_transform(y)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
# Creating a column transformer for data preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('numeric', StandardScaler(), X.columns)  # Standardize numeric features
    ])

# Creating the model pipeline with hyperparameter tuning using GridSearchCV
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier(random_state=42))
])

In [17]:
# Defining hyperparameters to tune
param_grid = {
    'classifier__max_depth': [None, 5, 10, 15],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__min_samples_leaf': [1, 2, 4]
}

# Using GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [18]:
# Getting the best model from hyperparameter tuning
best_model = grid_search.best_estimator_

# Training the best model
best_model.fit(X_train, y_train)

In [19]:
# Making predictions on the test set
y_pred = best_model.predict(X_test)

# Evaluating the model using accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.9863636363636363
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       0.95      1.00      0.98        20
           3       1.00      1.00      1.00        26
           4       0.96      1.00      0.98        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      0.96      0.94        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      0.95      0.98        21
          12       1.00      1.00      1.00        19
          13       1.00      0.92      0.96        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      

In [None]:
# Extracting feature importances from the trained model
feature_importances = best_model.named_steps['classifier'].feature_importances_
print('Feature Importances:')
for feature, importance in zip(X.columns, feature_importances):
    print(f'{feature}: {importance}')

In [21]:
# Visualizing the decision tree rules (up to a certain depth)
tree_rules = export_text(best_model.named_steps['classifier'], feature_names=list(X.columns))
print(f'Decision Tree Rules:\n{tree_rules}')

Decision Tree Rules:
|--- rainfall <= -1.32
|   |--- class: 15
|--- rainfall >  -1.32
|   |--- K <= 1.80
|   |   |--- humidity <= 0.09
|   |   |   |--- humidity <= -1.99
|   |   |   |   |--- K <= 0.04
|   |   |   |   |   |--- class: 9
|   |   |   |   |--- K >  0.04
|   |   |   |   |   |--- class: 3
|   |   |   |--- humidity >  -1.99
|   |   |   |   |--- N <= 0.24
|   |   |   |   |   |--- rainfall <= -0.37
|   |   |   |   |   |   |--- rainfall <= -0.78
|   |   |   |   |   |   |   |--- humidity <= -0.52
|   |   |   |   |   |   |   |   |--- class: 13
|   |   |   |   |   |   |   |--- humidity >  -0.52
|   |   |   |   |   |   |   |   |--- P <= -0.04
|   |   |   |   |   |   |   |   |   |--- class: 13
|   |   |   |   |   |   |   |   |--- P >  -0.04
|   |   |   |   |   |   |   |   |   |--- class: 10
|   |   |   |   |   |   |--- rainfall >  -0.78
|   |   |   |   |   |   |   |--- P <= 0.12
|   |   |   |   |   |   |   |   |--- humidity <= -0.35
|   |   |   |   |   |   |   |   |   |--- N <= -0.06


In [22]:
# Making predictions for new data
new_data = pd.DataFrame({
    'N': [50],
    'P': [25],
    'K': [0],
    'temperature': [0],
    'humidity': [0],
    'ph': [0],
    'rainfall': [100]
})

# Predicting the final crop
predicted_crop = le.inverse_transform(best_model.predict(new_data))
print(f'Recommended Crop: {predicted_crop[0]}')

Recommended Crop: kidneybeans


Random Forest
Accuracy: 99.31%

In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline

In [34]:
# Loading the dataset
data = pd.read_csv('Crop_recommendation.csv')


In [35]:
# Separating features (X) and target variable (y)
X = data.drop(columns=['label'])
y = data['label']

# Encoding
le = LabelEncoder()
y = le.fit_transform(y)


In [36]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
# Creating the model pipeline
model = make_pipeline(RandomForestClassifier(n_estimators=100, random_state=42))

In [38]:
# Training the model
model.fit(X_train, y_train)

In [39]:
# Making predictions on the test set
y_pred = model.predict(X_test)

In [42]:
# Evaluating the model using accuracya and classification report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.9931818181818182
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      1.00      0.96        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        19
          13       1.00      0.96      0.98        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      

In [43]:
# Making predictions for new changeable data
new_data = pd.DataFrame({
    'N': [50],
    'P': [25],
    'K': [0],
    'temperature': [0],
    'humidity': [0],
    'ph': [0],
    'rainfall': [0]
})

# Predicitng the final crop
predicted_crop = le.inverse_transform(model.predict(new_data))
print(f'Recommended Crop: {predicted_crop[0]}')

Recommended Crop: muskmelon
