In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler

# Load the dataset
data = pd.read_csv("Crop_Data.csv")

# Encode categorical variables
label_encoder_season = LabelEncoder()
data['season'] = label_encoder_season.fit_transform(data['season'])

label_encoder_label = LabelEncoder()
data['label'] = label_encoder_label.fit_transform(data['label'])

label_encoder_country = LabelEncoder()
data['Country'] = label_encoder_country.fit_transform(data['Country'])

label_encoder_harvest_season = LabelEncoder()
data['harvest_season'] = label_encoder_harvest_season.fit_transform(data['harvest_season'])

# Split the data into features (X) and target variable (y)
X = data.drop(['season', 'harvest_season'], axis=1)
# X = data.drop(['harvest_season'], axis=1)
y_plant = data['season']
y_harvest = data['harvest_season']

# # Standardize numerical features
scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_plant_train, y_plant_test, y_harvest_train, y_harvest_test = train_test_split(
    X, y_plant, y_harvest, test_size=0.2, random_state=42
)
# X_train, X_test, y_season_train, y_season_test = train_test_split(
#     X, y_season, test_size=0.2, random_state=42
# )
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from joblib import dump
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Create and train the model for crop prediction
model_plant = RandomForestClassifier(n_estimators=100, random_state=42)
model_plant.fit(X_train, y_plant_train)

# Make predictions on the test set
y_pred_plant = model_plant.predict(X_test)

# Evaluate the model
accuracy_plant = accuracy_score(y_plant_test, y_pred_plant)

# Create and train the model for crop harvest
model_harvest = RandomForestClassifier(n_estimators=100, random_state=42)
model_harvest.fit(X_train, y_harvest_train)

# Make predictions on the test set for harvest season
y_pred_harvest = model_harvest.predict(X_test)
accuracy_season = accuracy_score(y_harvest_test, y_pred_harvest)

# Precision, Recall, and F1 Score for the crop pred model
# precision_label = precision_score(y_label_test, y_pred_label, average='weighted')
# recall_label = recall_score(y_label_test, y_pred_label, average='weighted')
# f1_label = f1_score(y_label_test, y_pred_label, average='weighted')

print(f"Model Accuracy for crop pred: {accuracy_plant}")
print(f"Model Accuracy for harvest pred: {accuracy_season}")
# print(f"Precision for crop pred model: {precision_label}")
# print(f"Recall for crop pred model: {recall_label}")
# print(f"F1 Score for crop pred model: {f1_label}")



# Classification Report
# print("Classification Report:")
# print(classification_report(y_label_test, y_pred_label))
plant_model_filename = 'plant_season_prediction_model.joblib'
harvest_model_filename = 'harvest_prediction_model.joblib'
dump({'model': model_plant, 'label_encoder_season': label_encoder_season, 'label_encoder_label': label_encoder_label, 'label_encoder_country': label_encoder_country}, 'plant_season_prediction_model.joblib')
dump({'model': model_harvest, 'label_encoder_season': label_encoder_season, 'label_encoder_harvest_season': label_encoder_harvest_season, 'label_encoder_country': label_encoder_country}, 'harvest_prediction_model.joblib')
dump(scaler, 'standard_scaler.joblib')

print(f"Model saved as {plant_model_filename}")
print(f"Model saved as {harvest_model_filename}")


Model Accuracy for crop pred: 0.8678571428571429
Model Accuracy for harvest pred: 0.2571428571428571
Model saved as plant_season_prediction_model.joblib
Model saved as harvest_prediction_model.joblib


In [5]:
# Sample input values for both label and harvest season prediction
sample_input = {
    'temperature': 28.0,
    'humidity': 85.0,
    'ph': 9.0,
    'water_availability': 250.0,
    'label': 'rice',
    'country': 'South Africa'
}

# Use the loaded label encoders
label_encoded = label_encoder_label.transform([sample_input['label']])[0]
country_encoded = label_encoder_country.transform([sample_input['country']])[0]

# Standardize the sample input
sample_input_scaled = scaler.transform([[
    sample_input['temperature'],
    sample_input['humidity'],
    sample_input['ph'],
    sample_input['water_availability'],
    label_encoded,
    country_encoded,
]])

# Make predictions for crop label
crop_label_prediction = model_plant.predict(sample_input_scaled)[0]
predicted_plant_season = label_encoder_season.inverse_transform([crop_label_prediction])[0]

# Make predictions for harvest season
harvest_season_prediction = model_harvest.predict(sample_input_scaled)[0]
predicted_harvest_season = label_encoder_harvest_season.inverse_transform([harvest_season_prediction])[0]

print(f"The predicted best season to plant {sample_input['label']} based on the information provided is: {predicted_plant_season}")
print(f"The predicted best harvest season that will result in optimum yield is: {predicted_harvest_season}")


The predicted best season to plant rice based on the information provided is: rainy
The predicted best harvest season that will result in optimum yield is: spring




In [2]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the model
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)


In [1]:
pip show scikit-learn 

Name: scikit-learn
Version: 1.3.2
Summary: A set of python modules for machine learning and data mining
Home-page: http://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: c:\users\user\appdata\local\packages\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\localcache\local-packages\python310\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [3]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.28

Classification Report:
              precision    recall  f1-score   support

       rainy       0.28      0.26      0.27        73
      spring       0.27      0.32      0.29        68
      summer       0.32      0.26      0.29        72
      winter       0.26      0.27      0.26        67

    accuracy                           0.28       280
   macro avg       0.28      0.28      0.28       280
weighted avg       0.28      0.28      0.28       280


Confusion Matrix:
[[19 22 13 19]
 [16 22 17 13]
 [15 18 19 20]
 [19 20 10 18]]


In [4]:
# Example: GridSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

# Re-train the model with the best parameters
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Evaluate the best model
y_pred_best = best_model.predict(X_test)
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Accuracy with Best Model: {accuracy_best:.2f}")


Best Hyperparameters: {'max_depth': 20, 'n_estimators': 50}
Accuracy with Best Model: 0.25
