In [None]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load your dataset (replace with the correct file path)
df = pd.read_csv('/content/crop_yield.csv')

# Preprocess data: Encoding categorical features
label_encoder = LabelEncoder()

# Encode categorical features (Crop, State, Season)
df['Crop'] = label_encoder.fit_transform(df['Crop'])
df['State'] = label_encoder.fit_transform(df['State'])
df['Season'] = label_encoder.fit_transform(df['Season'])

# --- 1. Seasonal Crop Yield Comparison (Classification) ---
# Target: Crop performance in different seasons (classification problem)
X_seasonal = df[['Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Season']]
y_seasonal = df['Crop']  # Assuming 'Crop' is the target in this case

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_seasonal, y_seasonal, test_size=0.2, random_state=42)

# Train RandomForestClassifier
seasonal_model = RandomForestClassifier()
seasonal_model.fit(X_train, y_train)

# Evaluate the model
seasonal_preds = seasonal_model.predict(X_test)
seasonal_accuracy = accuracy_score(y_test, seasonal_preds)
print(f"Seasonal Crop Yield Comparison Accuracy: {seasonal_accuracy}")

# --- 2. Rainfall Impact on Yield Prediction (Regression) ---
# Target: Yield prediction based on rainfall (regression problem)
X_rainfall = df[['Annual_Rainfall', 'Area', 'Fertilizer', 'Pesticide', 'Crop']]
y_rainfall = df['Yield']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_rainfall, y_rainfall, test_size=0.2, random_state=42)

# Train RandomForestRegressor
rainfall_model = RandomForestRegressor()
rainfall_model.fit(X_train, y_train)

# Evaluate the model
rainfall_preds = rainfall_model.predict(X_test)
rainfall_mse = mean_squared_error(y_test, rainfall_preds)
rainfall_r2 = r2_score(y_test, rainfall_preds)
print(f"Rainfall Impact on Yield Prediction MSE: {rainfall_mse}")
print(f"Rainfall Impact on Yield Prediction R2: {rainfall_r2}")

# --- 3. Optimal Fertilizer and Pesticide Requirement Prediction (Regression) ---
# Target: Fertilizer prediction (regression problem)
X_fertilizer = df[['Crop', 'Area', 'Production', 'Annual_Rainfall', 'Season']]
y_fertilizer = df['Fertilizer']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_fertilizer, y_fertilizer, test_size=0.2, random_state=42)

# Train RandomForestRegressor
fertilizer_model = RandomForestRegressor()
fertilizer_model.fit(X_train, y_train)

# Evaluate the model
fertilizer_preds = fertilizer_model.predict(X_test)
fertilizer_mse = mean_squared_error(y_test, fertilizer_preds)
fertilizer_r2 = r2_score(y_test, fertilizer_preds)
print(f"Optimal Fertilizer Prediction MSE: {fertilizer_mse}")
print(f"Optimal Fertilizer Prediction R2: {fertilizer_r2}")

# --- 4. State-based Yield Prediction (Regression) ---
# Target: Yield prediction based on state (regression problem)
X_state = df[['State', 'Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Season']]
y_state = df['Yield']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_state, y_state, test_size=0.2, random_state=42)

# Train RandomForestRegressor
state_model = RandomForestRegressor()
state_model.fit(X_train, y_train)

# Evaluate the model
state_preds = state_model.predict(X_test)
state_mse = mean_squared_error(y_test, state_preds)
state_r2 = r2_score(y_test, state_preds)
print(f"State-based Yield Prediction MSE: {state_mse}")
print(f"State-based Yield Prediction R2: {state_r2}")

# --- 5. Save All Models in One .pkl File ---
# Store models in a dictionary
models = {
    'seasonal_classifier': seasonal_model,
    'rainfall_regressor': rainfall_model,
    'fertilizer_regressor': fertilizer_model,
    'state_regressor': state_model
}

# Save the models dictionary to a .pkl file
with open('all_models.pkl', 'wb') as f:
    pickle.dump(models, f)

print("All models have been saved in 'all_models.pkl'")


Seasonal Crop Yield Comparison Accuracy: 0.5165058405281869
Rainfall Impact on Yield Prediction MSE: 25924.087123958478
Rainfall Impact on Yield Prediction R2: 0.967645035825908
Optimal Fertilizer Prediction MSE: 189100716649262.25
Optimal Fertilizer Prediction R2: 0.974364634501997
State-based Yield Prediction MSE: 74883.98117000247
State-based Yield Prediction R2: 0.9065398709553922
All models have been saved in 'all_models.pkl'
