In [25]:
import pandas as pd

In [28]:
district_data = pd.read_csv('state_capital_crop_data.csv')

district_data.columns

Index(['state', 'capital_district', 'N', 'P', 'K', 'temperature', 'humidity',
       'ph', 'rainfall', 'label_number'],
      dtype='object')

* ### Prediction with user input of State and District

##### Saving in a dictionary the Model name and its predicted label

In [49]:
import os
import joblib
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Copy the original dataframe
df = district_data.copy()

# Define the numeric feature columns
feature_cols = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']

# Ensure feature columns are numeric
df[feature_cols] = df[feature_cols].apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values in features (optional but recommended)
df.dropna(subset=feature_cols, inplace=True)

# Load the pre-saved scaler
scaler = joblib.load('Scaler/scaler.pkl')

# Apply scaler to just the numeric features
df[feature_cols] = scaler.transform(df[feature_cols])

# Take user input
print("\n🌱 Crop Recommendation by State and District 🌱")
state = input("Enter State name: ")
district = input("Enter District name: ")

# Filter the matching row from the scaled dataframe
input_row = df[
    (df['state'].str.lower() == state.lower()) &
    (df['capital_district'].str.lower() == district.lower())
]

if input_row.empty:
    print("❌ No data found for the given State and District.")
else:
    # Extract the scaled features
    input_features = input_row[feature_cols].values

    print("\nPredictions from all models (as numeric labels):\n")

    # Store predictions in a dictionary
    predictions_dict = {}

    # Predict using all models in Saved_models
    model_folder = 'Saved_models'
    for file in os.listdir(model_folder):
        if file.endswith('.pkl'):
            model_path = os.path.join(model_folder, file)
            model = joblib.load(model_path)
            model_name = file.replace('.pkl', '')

            prediction = model.predict(input_features)[0]
            predictions_dict[model_name] = prediction

            print(f"{model_name:<25}: {prediction}")

print("\n📦 All Predictions Dictionary:")
print(predictions_dict)



🌱 Crop Recommendation by State and District 🌱


Enter State name:  West Bengal
Enter District name:  Kolkata



Predictions from all models (as numeric labels):

Logistic Regression      : 16
Naive Bayes              : 16
Random Forest            : 16
SVC                      : 16
XGBoost                  : 4

📦 All Predictions Dictionary:
{'Logistic Regression': 16, 'Naive Bayes': 16, 'Random Forest': 16, 'SVC': 16, 'XGBoost': 4}


##### The label predicte most time is treated as correct prediction

In [50]:
from collections import Counter

# Get list of predictions
predictions_list = list(predictions_dict.values())

# Count votes
vote_counts = Counter(predictions_list)

# Find most common label
most_common_label, vote_count = vote_counts.most_common(1)[0]

# Load label mapping from CSV
label_map = pd.read_csv("Label_numbers.csv")  # CSV with columns: 'label', 'crop'
label_to_crop = dict(zip(label_map['Number'], label_map['Name']))

# Get final crop name
final_crop = label_to_crop.get(most_common_label, "Unknown Crop")

#  Print result
print(f"✅ Final Recommended Crop: {final_crop}")
print(f"Label Number: {most_common_label} | Votes: {vote_count}")


✅ Final Recommended Crop: orange
Label Number: 16 | Votes: 4
