In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# Assuming the dataset and preprocessing steps are defined here as in the initial code snippet

# Load the dataset
df = pd.read_csv('Crop_Yield_District_wise_Dataset.csv')

# Define features and target variables
features = ['N', 'P', 'K', 'pH', 'Humidity', 'Temperature', 'Rainfall', 'District', 'SoilType']
X = df[features]
y_crop = df['CropName']
y_yield = df['CropYield']

# Preprocessing and model definitions as in the initial code snippet


# Preprocessing pipeline for numeric and categorical features
numeric_features = ['N', 'P', 'K', 'pH', 'Humidity', 'Temperature', 'Rainfall']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

categorical_features = ['District', 'SoilType']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Crop prediction and yield prediction models
model_crop = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

model_yield = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Splitting the dataset into training and testing sets
X_train, X_test, y_train_crop, y_test_crop = train_test_split(X, y_crop, test_size=0.2, random_state=42)
X_train_yield, X_test_yield, y_train_yield, y_test_yield = train_test_split(X, y_yield, test_size=0.2, random_state=42)

# Training the models
model_crop.fit(X_train, y_train_crop)
model_yield.fit(X_train_yield, y_train_yield)

# Define a function to adjust input data based on crop type for yield prediction
def adjust_input_for_crop(input_data, crop):
    adjusted_input = input_data.copy()
    # Hypothetical adjustments for demonstration
    if crop == "CropA":
        adjusted_input['Rainfall'] *= 1.05
        adjusted_input['Temperature'] *= 0.95
    elif crop == "CropB":
        adjusted_input['Rainfall'] *= 0.95
        adjusted_input['Temperature'] *= 1.05
    # Add more elif blocks for other crops with specific adjustments
    return adjusted_input

# Function to predict yield for a given crop by adjusting input data
def predict_yield_for_crop(input_data, crop):
    adjusted_input = adjust_input_for_crop(input_data, crop)
    sample_input = pd.DataFrame([adjusted_input])
    predicted_yield = model_yield.predict(sample_input)[0]
    return predicted_yield

# Update the prediction functions to use predict_yield_for_crop

def predict_best_and_second_best_crops_with_yields(input_data):
    sample_input = pd.DataFrame([input_data])
    # Get probabilities of each crop
    probabilities = model_crop.predict_proba(sample_input)[0]
    crops = model_crop.classes_
    
    # Get indices of the top two crops
    top_two_indices = np.argsort(probabilities)[-2:]
    top_crops = crops[top_two_indices]
    
    # Predict yields for the top two crops
    yields = [predict_yield_for_crop(input_data, crop) for crop in top_crops]
    
    return list(zip(top_crops, yields))

# Example input data
input_data = {
    'N': 60, 'P': 29, 'K': 44, 'pH': 5.7, 'Humidity': 82, 'Temperature': 25, 'Rainfall': 1750,
    'District': 'Malda', 'SoilType': 'Red & Laterite Soil'
}

# Making predictions for the best and second-best fit crops and their yields
predictions = predict_best_and_second_best_crops_with_yields(input_data)
for i, (crop, yield_pred) in enumerate(predictions, 1):
    print(f"Top {i} Crop: {crop}, Predicted Yield: {yield_pred}")

Top 1 Crop: Potatoes, Predicted Yield: 1272.4
Top 2 Crop: Sorghum (Jowar), Predicted Yield: 1272.4


In [2]:
from sklearn.metrics import accuracy_score, r2_score

# Predicting the crops on the test set
y_pred_crop = model_crop.predict(X_test)

# Calculating the accuracy for the crop prediction model
accuracy_crop = accuracy_score(y_test_crop, y_pred_crop)

# Predicting the yields on the test set
y_pred_yield = model_yield.predict(X_test_yield)

# Calculating the R-squared score for the yield prediction model
r2_yield = r2_score(y_test_yield, y_pred_yield)

# Printing the results
print(f"Crop Prediction Model Accuracy: {accuracy_crop}")
print(f"Yield Prediction Model R-squared Score: {r2_yield}")

Crop Prediction Model Accuracy: 0.5951219512195122
Yield Prediction Model R-squared Score: 0.43345045959789175
