In [1]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, r2_score, mean_absolute_error
from common.response import failure_response, success_response
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import warnings


warnings.filterwarnings('ignore')


# df = pd.DataFrame(train_data)
df = pd.read_csv('yield_prediction_dataset.csv')

print(f"Dataset shape: {df.shape}")

# Store original encoders for later use
encoders = {}

# Convert categorical variables to numerical using Label Encoding
categorical_columns = ['Soil_Characteristics', 'Nutrition_Value', 'Crop_Variety', 
                        'Pest_and_Diseases', 'Fertilizers', 'Fertilizer_Usage']

for col in categorical_columns:
    encoders[col] = LabelEncoder()
    df[col] = encoders[col].fit_transform(df[col])

# Split the dataset into features (X) and target variable (y)
X = df.drop('Yield_Tons', axis=1)
y = df['Yield_Tons']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Testing set size: {X_test.shape[0]}")

# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on training data
rf_model.fit(X_train, y_train)

# Make predictions on test set
y_pred = rf_model.predict(X_test)

# Calculate regression metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nModel Performance Metrics:")
print(f"R² Score: {r2:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")

# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print(f"\nFeature Importance:")
print(feature_importance)


Dataset shape: (1000, 9)
Training set size: 900
Testing set size: 100

Model Performance Metrics:
R² Score: 0.8935
Mean Squared Error: 6.0001
Root Mean Squared Error: 2.4495
Mean Absolute Error: 1.5063

Feature Importance:
                feature  importance
4           Fertilizers    0.352155
1       Nutrition_Value    0.220870
2          Crop_Variety    0.143980
3     Pest_and_Diseases    0.135384
6       Farm_Size_Acres    0.048167
7              pH_Level    0.041274
5      Fertilizer_Usage    0.030458
0  Soil_Characteristics    0.027711


In [2]:
import pickle
with open('yield_prediction.pkl', 'wb') as file:
  pickle.dump(rf_model, file)

In [14]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

df = pd.read_csv('crop_prediction_dataset.csv')
print(f"Crop suggestion dataset shape: {df.shape}")

df_encoded = pd.get_dummies(df, columns=['Soil_Type'])

X = df_encoded.drop('Crop', axis=1)
y = df_encoded['Crop']

feature_columns = X.columns.tolist()

if len(df_encoded) > 4:  
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    
    print(f"Training set size: {X_train.shape[0]}")
    print(f"Testing set size: {X_test.shape[0]}")
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nCrop Suggestion Model Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
    
    if len(np.unique(y_test)) > 1:
        print(f"\nClassification Report:")
        print(classification_report(y_test, y_pred))
        
else:
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X, y)
    print("Dataset too small for train-test split. Using full dataset for training.")

model_data = {
    'model': model,
    'feature_columns': feature_columns,
    'soil_types': [col.replace('Soil_Type_', '') for col in feature_columns if col.startswith('Soil_Type_')]
}

with open('crop_suggestion.pkl', 'wb') as file:
    pickle.dump(model_data, file)



Crop suggestion dataset shape: (1000, 5)
Training set size: 700
Testing set size: 300

Crop Suggestion Model Accuracy: 0.7200 (72.00%)

Classification Report:
              precision    recall  f1-score   support

      Barley       0.89      0.94      0.91        50
        Corn       0.53      0.60      0.56        50
       Maize       0.65      0.52      0.58        50
        Rice       1.00      0.94      0.97        50
     Soybean       0.52      0.54      0.53        50
       Wheat       0.76      0.78      0.77        50

    accuracy                           0.72       300
   macro avg       0.72      0.72      0.72       300
weighted avg       0.72      0.72      0.72       300

