In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
df = pd.read_csv("crop_yield_fertilizer.csv")

# Encode categorical crop type
df = pd.get_dummies(df, columns=['Crop'], drop_first=True)

# Define features and target variables
X = df.drop(columns=['Yield (tons/ha)', 'Nitrogen (N)', 'Phosphorus (P)', 'Potassium (K)'])  # Features
y_yield = df['Yield (tons/ha)']  # Yield target variable
y_fertilizer = df[['Nitrogen (N)', 'Phosphorus (P)', 'Potassium (K)']]  # Fertilizer target variables

# Split data into training and testing sets
X_train, X_test, y_train_yield, y_test_yield = train_test_split(X, y_yield, test_size=0.2, random_state=42)
X_train_fert, X_test_fert, y_train_fert, y_test_fert = train_test_split(X, y_fertilizer, test_size=0.2, random_state=42)

# Train a Random Forest Regressor model for yield prediction
yield_model = RandomForestRegressor(n_estimators=100, random_state=42)
yield_model.fit(X_train, y_train_yield)

# Train a Random Forest Regressor model for fertilizer prediction
fertilizer_model = RandomForestRegressor(n_estimators=100, random_state=42)
fertilizer_model.fit(X_train_fert, y_train_fert)

# Make predictions
y_pred_yield = yield_model.predict(X_test)
y_pred_fertilizer = fertilizer_model.predict(X_test_fert)

# Evaluate the models
mae_yield = mean_absolute_error(y_test_yield, y_pred_yield)
r2_yield = r2_score(y_test_yield, y_pred_yield)

mae_fertilizer = mean_absolute_error(y_test_fert, y_pred_fertilizer)
r2_fertilizer = r2_score(y_test_fert, y_pred_fertilizer)

print(f"Yield Prediction - Mean Absolute Error: {mae_yield:.2f}")
print(f"Yield Prediction - R-squared Score: {r2_yield:.2f}")
print(f"Fertilizer Prediction - Mean Absolute Error: {mae_fertilizer:.2f}")
print(f"Fertilizer Prediction - R-squared Score: {r2_fertilizer:.2f}")

# Function to predict crop yield and fertilizer needs based on new input
def predict_yield_fertilizer(input_data):
    input_df = pd.DataFrame([input_data])
    input_df = input_df.reindex(columns=X.columns, fill_value=0)
    predicted_yield = yield_model.predict(input_df)[0]
    predicted_fertilizer = fertilizer_model.predict(input_df)[0]
    return predicted_yield, predicted_fertilizer

# Example input for prediction
example_input = {
    'Temperature (°C)': 28.0,
    'Rainfall (mm)': 3000,
    'Soil pH': 7.5,
    'Crop_Rice': 1,  # Set 1 for selected crop, 0 for others
}

predicted_yield, predicted_fertilizer = predict_yield_fertilizer(example_input)
print(f"Predicted Yield: {predicted_yield:.2f} tons/ha")
print(f"Predicted Fertilizer Needs (N, P, K): {predicted_fertilizer}")


Yield Prediction - Mean Absolute Error: 1.44
Yield Prediction - R-squared Score: -0.09
Fertilizer Prediction - Mean Absolute Error: 12.50
Fertilizer Prediction - R-squared Score: -0.14
Predicted Yield: 3.23 tons/ha
Predicted Fertilizer Needs (N, P, K): [55.85 52.9  33.77]
