In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Load the dataset
# Make sure the file 'crop_yield_50000.csv' is in the same directory as this script
df = pd.read_csv('crop_yield_50000.csv')

# Define features (X) and target (y)
# We will use all columns except the target
X = df.drop('Yield_tons_per_hectare', axis=1)
y = df['Yield_tons_per_hectare']

# Identify categorical and numerical features
categorical_features = ['Region', 'Soil_Type', 'Crop', 'Weather_Condition']
numerical_features = ['Rainfall_mm', 'Temperature_Celsius', 'Days_to_Harvest']

# Convert boolean columns to integer
X['Fertilizer_Used'] = X['Fertilizer_Used'].astype(int)
X['Irrigation_Used'] = X['Irrigation_Used'].astype(int)

# Create a column transformer for preprocessing
# We will apply one-hot encoding to categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

# Create the model pipeline with RandomForestRegressor
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
])

# Split the data into training and testing sets (80% for training, 20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
print("\nTraining the model...")
model.fit(X_train, y_train)
print("Model training completed.")

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("\n--- Model Performance ---")
print(f"R-squared (R²) Score: {r2:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")




Training the model...
Model training completed.

--- Model Performance ---
R-squared (R²) Score: 0.9093
Root Mean Squared Error (RMSE): 0.5082


In [2]:
import joblib
import pandas as pd

# ==============================
# Mapping dictionaries
# ==============================
region_map = {1: "North", 2: "South", 3: "East", 4: "West"}
soil_map = {1: "Loamy", 2: "Sandy", 3: "Clay", 4: "Black"}
crop_map = {1: "Wheat", 2: "Rice", 3: "Maize", 4: "Sugarcane"}
weather_map = {1: "Sunny", 2: "Rainy", 3: "Cloudy", 4: "Stormy"}

# ==============================
# Load best model
# ==============================
model = joblib.load("best_crop_yield_model.pkl")

# ==============================
# Take user input
# ==============================
print("\n🌾 Crop Yield Prediction System 🌾")

# Category inputs
print("\nSelect Region:")
for k, v in region_map.items(): 
    print(f"{k}. {v}")
region = region_map[int(input("Enter choice: "))]

print("\nSelect Soil Type:")
for k, v in soil_map.items(): 
    print(f"{k}. {v}")
soil = soil_map[int(input("Enter choice: "))]

print("\nSelect Crop:")
for k, v in crop_map.items(): 
    print(f"{k}. {v}")
crop = crop_map[int(input("Enter choice: "))]

print("\nSelect Weather Condition:")
for k, v in weather_map.items(): 
    print(f"{k}. {v}")
weather = weather_map[int(input("Enter choice: "))]

# Numerical inputs (with ranges for guidance)
rainfall = float(input("\nEnter Rainfall (mm) (100 – 1000): "))
temp = float(input("Enter Temperature (°C) (10 – 45): "))
days = int(input("Enter Days to Harvest (60 – 200): "))

# Boolean inputs
fertilizer = int(input("Fertilizer Used? (1=Yes, 0=No): "))
irrigation = int(input("Irrigation Used? (1=Yes, 0=No): "))

# ==============================
# Prepare DataFrame
# ==============================
sample = pd.DataFrame([{
    "Region": region,
    "Soil_Type": soil,
    "Crop": crop,
    "Weather_Condition": weather,
    "Rainfall_mm": rainfall,
    "Temperature_Celsius": temp,
    "Days_to_Harvest": days,
    "Fertilizer_Used": fertilizer,
    "Irrigation_Used": irrigation
}])

# ==============================
# Predict
# ==============================
pred_yield = model.predict(sample)[0]

# Range (±10%)
lower = pred_yield * 0.9
upper = pred_yield * 1.1

# ==============================
# Beautiful Output
# ==============================
print("\n📊 Prediction Report")
print("=" * 40)
print(f"🌱 Crop Selected      : {crop}")
print(f"🌍 Region             : {region}")
print(f"🪨 Soil Type          : {soil}")
print(f"☁️ Weather Condition  : {weather}")
print(f"💧 Rainfall Entered   : {rainfall} mm")
print(f"🌡️ Temperature Given  : {temp} °C")
print(f"⏳ Days to Harvest    : {days}")
print(f"🧪 Fertilizer Used    : {'Yes' if fertilizer == 1 else 'No'}")
print(f"🚰 Irrigation Used    : {'Yes' if irrigation == 1 else 'No'}")

print("\n✅ Predicted Yield    : {:.2f} tons/hectare".format(pred_yield))
print("📈 Expected Range     : {:.2f} - {:.2f} tons/hectare".format(lower, upper))

print(f"\n🔎 Summary: For {crop} grown in {region} region with {soil} soil "
      f"and {weather.lower()} conditions, the estimated yield is around "
      f"{pred_yield:.2f} tons per hectare. Considering environmental "
      f"and farming variations, the realistic yield may fall between "
      f"{lower:.2f} and {upper:.2f} tons per hectare.")
print("=" * 40)


🌾 Crop Yield Prediction System 🌾

Select Region:
1. North
2. South
3. East
4. West

Select Soil Type:
1. Loamy
2. Sandy
3. Clay
4. Black

Select Crop:
1. Wheat
2. Rice
3. Maize
4. Sugarcane

Select Weather Condition:
1. Sunny
2. Rainy
3. Cloudy
4. Stormy

📊 Prediction Report
🌱 Crop Selected      : Rice
🌍 Region             : North
🪨 Soil Type          : Sandy
☁️ Weather Condition  : Rainy
💧 Rainfall Entered   : 500.0 mm
🌡️ Temperature Given  : 36.0 °C
⏳ Days to Harvest    : 90
🧪 Fertilizer Used    : Yes
🚰 Irrigation Used    : Yes

✅ Predicted Yield    : 5.90 tons/hectare
📈 Expected Range     : 5.31 - 6.49 tons/hectare

🔎 Summary: For Rice grown in North region with Sandy soil and rainy conditions, the estimated yield is around 5.90 tons per hectare. Considering environmental and farming variations, the realistic yield may fall between 5.31 and 6.49 tons per hectare.
