In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pickle as pkl

# Load dataset
df = pd.read_csv("Crop_Yield_and_Disease_Cleaned.csv")

# Prepare features and target
X = df.drop(columns=["Crop_Yield", "Disease", "Disease_Type"])
y = df["Crop_Yield"]

# Identify categorical and numeric columns
cat_cols = X.select_dtypes(include='object').columns.tolist()
num_cols = X.select_dtypes(include='number').columns.tolist()

# Preprocessing
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(handle_unknown='ignore'), cat_cols)
])

# Pipeline
pipe = Pipeline([
    ("preprocess", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train model
pipe.fit(X, y)

# Save model to pickle file
pkl.dump(pipe, open("crop_yield_diseases.pkl", "wb"))

print("✅ Model pipeline saved as crop_yield_diseases.pkl")


✅ Model pipeline saved as crop_yield_diseases.pkl
