In [1]:
import json, ast, pickle
import numpy as np
import pandas as pd
import os

# --- Step 1: Define necessary classes and helpers ---
# These are needed to handle data in the same way the model was trained.

# Custom Standard Scaler for numeric features
class StandardScalerLite:
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X):
        X = X.astype(float)
        self.mean_ = np.mean(X, axis=0)
        self.scale_ = np.std(X, axis=0)
        self.scale_[self.scale_ == 0] = 1.0 # Avoid division by zero
        return self
    
    def transform(self, X):
        X = X.astype(float)
        return (X - self.mean_) / self.scale_

# Custom One-Hot Encoder
class OneHotEncoderLite:
    def __init__(self):
        self.cat_maps = {}
        self.col_index_ranges = {}
        self.n_features_ = 0
    
    def fit(self, frame, cat_cols):
        start = 0
        for col in cat_cols:
            uniq = sorted({str(x) for x in frame[col].fillna("NA_VALUE").astype(str)})
            mapping = {u:i for i,u in enumerate(uniq)}
            self.cat_maps[col] = mapping
            end = start + len(mapping)
            self.col_index_ranges[col] = (start, end)
            start = end
        self.n_features_ = start
        return self
    
    def transform(self, frame):
        out = np.zeros((len(frame), self.n_features_), dtype=np.float32)
        for col, mapping in self.cat_maps.items():
            start, end = self.col_index_ranges[col]
            idxs = frame[col].fillna("NA_VALUE").astype(str).map(lambda v: mapping.get(v, None))
            for i, idx in enumerate(idxs):
                if idx is not None:
                    out[i, start+idx] = 1.0
        return out

# Custom Label Encoder
class LabelEncoderLite:
    def __init__(self):
        self.class_to_id = {}
        self.id_to_class = []
    def fit(self, y):
        uniq = sorted(set(y))
        self.class_to_id = {c:i for i,c in enumerate(uniq)}
        self.id_to_class = uniq
        return self
    def transform(self, y):
        return np.array([self.class_to_id[v] for v in y], dtype=np.int64)
    def inverse_transform(self, ids):
        return [self.id_to_class[i] for i in ids]

# Activation and Loss functions (needed by the MLP class)
def relu(x):
    return np.maximum(0, x)
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# MLP class definition (without the fit method)
class SimpleMLP:
    def __init__(self, input_dim, hidden_dim, output_dim, W1, b1, W2, b2):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.W1, self.b1, self.W2, self.b2 = W1, b1, W2, b2

    def _forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = softmax(self.z2)
        return self.a2

    def predict_proba(self, X):
        return self._forward(X)

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

# Helper functions from the previous script
def try_parse_list(x):
    if pd.isna(x): return []
    if isinstance(x, list): return [str(i).strip() for i in x]
    s = str(x).strip()
    if not s: return []
    try:
        v = ast.literal_eval(s)
        if isinstance(v, list): 
            return [str(i).strip() for i in v]
    except Exception:
        pass
    s = s.replace("[","").replace("]","").replace('"','').replace("'","")
    return [t.strip() for t in s.split(",") if t.strip()]

def try_parse_json(x):
    if pd.isna(x): return {}
    s = str(x).strip()
    if not s: return {}
    try:
        return json.loads(s)
    except Exception:
        try:
            return ast.literal_eval(s)
        except Exception:
            return {}
            
def nanmean_safe(series, fallback=None):
    v = pd.to_numeric(series, errors="coerce")
    m = v.mean()
    if pd.isna(m):
        return fallback
    return m

# --- Step 2: Load the saved model and encoders ---
# Ensure these files (from the previous script) are in the same directory.
try:
    with open("encoders.pkl", "rb") as f:
        data = pickle.load(f)
        ohe = data["ohe"]
        le_y = data["le_y"]
        num_cols = data["num_cols"]
        cat_cols = data["cat_cols"]
        scaler = data["scaler"]
    
    with np.load("croprecommender_mlp.npz") as data:
        W1, b1, W2, b2 = data['W1'], data['b1'], data['W2'], data['b2']

    # Create the model instance with the loaded weights
    model = SimpleMLP(W1.shape[0], W1.shape[1], W2.shape[1], W1, b1, W2, b2)
    print("✅ Model and encoders loaded successfully.")

except FileNotFoundError as e:
    print(f"Error: {e}. Please ensure 'encoders.pkl' and 'croprecommender_mlp.npz' are in the same directory as this script.")
    raise e

# --- Step 3: Define the prediction pipeline helper functions ---
CSV_PATH = "apcrop_dataset_realistic.csv"
if os.path.exists(CSV_PATH):
    df = pd.read_csv(CSV_PATH)
    num_cols_all = ["Soil_pH","Organic_Carbon_pct","Soil_N_kg_ha","Soil_P_kg_ha","Soil_K_kg_ha",
            "Avg_Temp_C","Seasonal_Rainfall_mm","Avg_Humidity_pct","Market_Price_Index"]
    group_means = df.groupby(["District","Season"])[num_cols_all].agg(lambda s: pd.to_numeric(s, errors="coerce").mean()).reset_index()
else:
    print(f"Warning: '{CSV_PATH}' not found. Imputation will rely on season defaults only.")
    group_means = pd.DataFrame(columns=["District", "Season"] + num_cols)

season_defaults = {
    "Kharif": {"Avg_Temp_C": 29.0, "Seasonal_Rainfall_mm": 600.0, "Avg_Humidity_pct": 78.0},
    "Rabi":   {"Avg_Temp_C": 23.0, "Seasonal_Rainfall_mm": 200.0, "Avg_Humidity_pct": 65.0},
    "Zaid":   {"Avg_Temp_C": 31.0, "Seasonal_Rainfall_mm": 120.0, "Avg_Humidity_pct": 70.0},
}
def build_feature_row(input_dict, ohe, num_cols, cat_cols, scaler, group_means, season_defaults):
    row = {c: input_dict.get(c, None) for c in num_cols}
    row.update({c: input_dict.get(c, "NA_VALUE") for c in cat_cols})
    
    tmp = pd.DataFrame([row])
    tmp["District"] = row.get("District", "NA_VALUE")
    tmp["Season"] = row.get("Season", "Kharif")

    g = group_means[(group_means["District"]==tmp.at[0,"District"]) & (group_means["Season"]==tmp.at[0,"Season"])]
    for col in num_cols:
        val = tmp.at[0, col]
        if val is None or (isinstance(val,str) and not val.strip()):
            dv = None
            if not g.empty and not pd.isna(g.iloc[0][col]):
                dv = g.iloc[0][col]
            if (dv is None or pd.isna(dv)) and tmp.at[0,"Season"] in season_defaults and col in season_defaults[tmp.at[0,"Season"]]:
                dv = season_defaults[tmp.at[0,"Season"]][col]
            if dv is None or pd.isna(dv):
                # Fallback to a reasonable default if all else fails
                dv = df[col].mean() if 'df' in globals() else 0.0
            tmp.at[0, col] = dv
    
    x_num_raw = tmp[num_cols].astype(float).values.astype(np.float32)
    x_num = scaler.transform(x_num_raw)
    x_cat = ohe.transform(tmp[cat_cols])
    x = np.concatenate([x_num, x_cat], axis=1)
    return x

def predict_topN(input_dict, N=3):
    IRRIGATION_TIPS = {
        "Paddy": ["Daily flooding, maintain 5–10 cm standing water.", "Ensure water at tillering and flowering.", "Drain completely 7–10 days before harvest."],
        "Maize": ["Irrigate weekly; adjust to rainfall & soil.", "Critical: tasseling, silking, grain filling.", "Avoid stress in reproductive phase."],
        "Groundnut": ["Irrigate ~every 10 days.", "Critical: flowering & pegging stages.", "Keep moisture during pod development."],
        "Wheat": ["Irrigate at CRI, jointing, flowering.", "CRI (≈21 DAS) is most critical.", "Avoid waterlogging."],
        "Bengal Gram": ["Minimal irrigation; drought-tolerant.", "One light irrigation at flowering if very dry.", "Avoid heavy irrigation."],
        "Sunflower": ["Irrigate ~every 12 days.", "Critical: bud, flowering, seed filling.", "Drip works very well."],
        "Castor": ["Irrigate ~every 15 days.", "Provide at branching & spike initiation.", "Too much water → foliage, fewer seeds."],
        "Bajra": ["Prefer rainfed; drought-tolerant.", "If needed, irrigate at flowering & grain filling.", "Avoid over-irrigation (lodging risk)."],
        "Linseed": ["Light irrigation at branching & flowering.", "Never waterlog; highly susceptible.", "1–2 irrigations often enough."],
        "Mustard": ["Irrigate at branching & pod filling.", "First irrigation 30–35 DAS.", "Avoid watering during flowering."],
        "Watermelon": ["Irrigate ~every 7 days; uniform moisture.", "Avoid stress at flowering & fruit set.", "Reduce water near maturity for sweetness."],
        "Muskmelon": ["Irrigate ~every 7 days, avoid waterlogging.", "Consistent water for fruit growth & quality.", "Drip reduces fungal risk."],
        "Cowpea": ["Irrigate ~every 10 days.", "Critical: flowering & pod development.", "Drought tolerant but timely water boosts yield."],
        "Cotton": ["Irrigate ~every 15 days; adjust by weather.", "Critical: squaring, flowering, boll formation.", "Avoid waterlogging (boll rot)."],
        "Sugarcane": ["Frequent water in hot/dry months.", "Formative stage (to ~120 DAS) needs moisture.", "Reduce 1–2 months pre-harvest."],
        "Barley": ["Light irrigations as needed.", "CRI is most crucial.", "Sensitive to waterlogging."],
        "Lentil": ["Mostly rainfed.", "One light irrigation pre-flowering may help.", "Too much water → vegetative growth."],
        "Soybean": ["Prefer rainfed/moderate drought tolerance.", "Supplement at pod filling if dry.", "Avoid irrigation during flowering."],
        "Pea": ["Irrigate at flowering & pod filling.", "Initial irrigation helps germination.", "Avoid overwatering (root rot risk)."],
        "Vegetables": ["Irrigate every 5–7 days depending on crop.", "Drip to root-zone conserves water.", "Consistency prevents cracking/bitterness."],
        "Jute": ["Keep soil moist throughout.", "Frequent light irrigation in hot season.", "Ensure drainage to avoid root decay."],
        "Oats": ["Irrigate ~every 12 days.", "Critical: tillering & flowering.", "Relatively drought-tolerant."],
        "Cucumber": ["Irrigate ~every 7 days; uniform moisture.", "Consistent water at fruit set & growth.", "Low water → bitter fruits."],
        "Sugar Beet": ["Irrigate ~every 10 days.", "Critical: canopy establishment, root bulking.", "Avoid waterlogging (low sugar, root rot)."],
        "Pearl Millet": ["Prefer rainfed; very drought-tolerant.", "If needed, irrigate at flowering.", "Water stress at grain filling cuts yield."],
        "Cluster Bean": ["Prefer rainfed; arid-suited.", "1–2 light irrigations in long dry spells.", "Overwatering reduces pod set."],
        "Sesame": ["Prefer rainfed; drought-hardy.", "One irrigation at flowering if dry.", "Avoid waterlogging (root rot)."],
        "Green Gram": ["Rainfed OK; one irrigation at flowering.", "Avoid heavy irrigation (root disease risk).", "Irrigation can boost yield modestly."],
        "Millets": ["Mostly rainfed; high drought tolerance.", "Irrigate only at critical stages if very dry.", "Avoid excessive water."],
        "Sorghum": ["Mostly rainfed; irrigate if prolonged dry.", "Critical: booting & flowering.", "Avoid waterlogging."],
        "Jowar": ["Irrigate at heading and grain formation.", "Drought tolerant, but water stress impacts yield.", "Avoid waterlogging."]
    }
    x = build_feature_row(input_dict, ohe, num_cols, cat_cols, scaler, group_means, season_defaults)
    probs = model.predict_proba(x)[0]
    top_idx = np.argsort(-probs)[:N]
    crops = [le_y.id_to_class[i] for i in top_idx]
    confs = [float(probs[i]) for i in top_idx]
    tips  = {c: IRRIGATION_TIPS.get(c, ["Follow crop-specific schedule."]) for c in crops}
    return list(zip(crops, confs)), tips

# --- Step 4: MANUALLY INPUT YOUR DATA HERE ---
# Feel free to change these values to test new scenarios.
manual_input_data = {
    "District": "Anantapur",
    "Season": "Kharif",
    "Water_Source": "Tank",
    "Previous_Crop": "Maize",
    "Soil_pH": 6.8,
    "Organic_Carbon_pct": 0.5,
    "Soil_N_kg_ha": 150,
    "Soil_P_kg_ha": 15,
    "Soil_K_kg_ha": 160,
    "Avg_Temp_C": 30.5,
    "Seasonal_Rainfall_mm": 550,
    "Avg_Humidity_pct": 75,
    "Market_Price_Index": 0.8
}

# --- Step 5: Run prediction and print results ---
print("\n--- Generating predictions for your manual input ---")
predictions, irrigation_tips = predict_topN(manual_input_data, N=5)

for crop, confidence in predictions:
    print(f"\n🌾 Predicted Crop: {crop}")
    print(f"   Confidence: {confidence:.3f}")
    print(f"   Irrigation Tip: {irrigation_tips.get(crop, ['No specific tips found.'])[0]}")

print("\n--- Prediction complete ---")


✅ Model and encoders loaded successfully.

--- Generating predictions for your manual input ---

🌾 Predicted Crop: Groundnut
   Confidence: 0.802
   Irrigation Tip: Irrigate ~every 10 days.

🌾 Predicted Crop: Paddy
   Confidence: 0.096
   Irrigation Tip: Daily flooding, maintain 5–10 cm standing water.

🌾 Predicted Crop: Cotton
   Confidence: 0.056
   Irrigation Tip: Irrigate ~every 15 days; adjust by weather.

🌾 Predicted Crop: Pearl Millet
   Confidence: 0.019
   Irrigation Tip: Prefer rainfed; very drought-tolerant.

🌾 Predicted Crop: Vegetables
   Confidence: 0.011
   Irrigation Tip: Irrigate every 5–7 days depending on crop.

--- Prediction complete ---


In [5]:
import pickle
import numpy as np
import pandas as pd
import os
import json, ast

# --- Essential classes and helpers to run the model ---
class StandardScalerLite:
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    def transform(self, X):
        X = X.astype(float)
        return (X - self.mean_) / self.scale_

class OneHotEncoderLite:
    def __init__(self):
        self.cat_maps = {}
        self.col_index_ranges = {}
        self.n_features_ = 0
    def transform(self, frame):
        out = np.zeros((len(frame), self.n_features_), dtype=np.float32)
        for col, mapping in self.cat_maps.items():
            start, end = self.col_index_ranges[col]
            idxs = frame[col].fillna("NA_VALUE").astype(str).map(lambda v: mapping.get(v, None))
            for i, idx in enumerate(idxs):
                if idx is not None:
                    out[i, start+idx] = 1.0
        return out

class LabelEncoderLite:
    def __init__(self):
        self.id_to_class = []
    def inverse_transform(self, ids):
        return [self.id_to_class[i] for i in ids]

def relu(x):
    return np.maximum(0, x)
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

class SimpleMLP:
    def __init__(self, W1, b1, W2, b2):
        self.W1, self.b1, self.W2, self.b2 = W1, b1, W2, b2
    def _forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = softmax(self.z2)
        return self.a2
    def predict_proba(self, X):
        return self._forward(X)

# --- Load the saved model and encoders ---
try:
    with open("encoders.pkl", "rb") as f:
        data = pickle.load(f)
        ohe = data["ohe"]
        le_y = data["le_y"]
        num_cols = data["num_cols"]
        cat_cols = data["cat_cols"]
        scaler = data["scaler"]
    
    with np.load("croprecommender_mlp.npz") as data:
        W1, b1, W2, b2 = data['W1'], data['b1'], data['W2'], data['b2']
    
    model = SimpleMLP(W1, b1, W2, b2)
    print("✅ Model and encoders loaded successfully.")

except FileNotFoundError as e:
    print(f"Error: {e}. Please ensure 'encoders.pkl' and 'croprecommender_mlp.npz' are in the same directory.")
    raise e

# --- Prediction pipeline helper functions ---
CSV_PATH = "apcrop_dataset_realistic.csv"
if os.path.exists(CSV_PATH):
    df = pd.read_csv(CSV_PATH)
    num_cols_all = ["Soil_pH","Organic_Carbon_pct","Soil_N_kg_ha","Soil_P_kg_ha","Soil_K_kg_ha",
            "Avg_Temp_C","Seasonal_Rainfall_mm","Avg_Humidity_pct","Market_Price_Index"]
    group_means = df.groupby(["District","Season"])[num_cols_all].agg(lambda s: pd.to_numeric(s, errors="coerce").mean()).reset_index()
else:
    group_means = pd.DataFrame(columns=["District", "Season"] + num_cols)

season_defaults = {
    "Kharif": {"Avg_Temp_C": 29.0, "Seasonal_Rainfall_mm": 600.0, "Avg_Humidity_pct": 78.0},
    "Rabi":   {"Avg_Temp_C": 23.0, "Seasonal_Rainfall_mm": 200.0, "Avg_Humidity_pct": 65.0},
    "Zaid":   {"Avg_Temp_C": 31.0, "Seasonal_Rainfall_mm": 120.0, "Avg_Humidity_pct": 70.0},
}
def build_feature_row(input_dict, ohe, num_cols, cat_cols, scaler, group_means, season_defaults):
    row = {c: input_dict.get(c, None) for c in num_cols}
    row.update({c: input_dict.get(c, "NA_VALUE") for c in cat_cols})
    tmp = pd.DataFrame([row])
    tmp["District"] = row.get("District", "NA_VALUE")
    tmp["Season"] = row.get("Season", "Kharif")
    g = group_means[(group_means["District"]==tmp.at[0,"District"]) & (group_means["Season"]==tmp.at[0,"Season"])]
    for col in num_cols:
        val = tmp.at[0, col]
        if val is None or (isinstance(val,str) and not val.strip()):
            dv = None
            if not g.empty and not pd.isna(g.iloc[0][col]):
                dv = g.iloc[0][col]
            if (dv is None or pd.isna(dv)) and tmp.at[0,"Season"] in season_defaults and col in season_defaults[tmp.at[0,"Season"]]:
                dv = season_defaults[tmp.at[0,"Season"]][col]
            if dv is None or pd.isna(dv):
                dv = df[col].mean() if 'df' in globals() else 0.0
            tmp.at[0, col] = dv
    x_num_raw = tmp[num_cols].astype(float).values.astype(np.float32)
    x_num = scaler.transform(x_num_raw)
    x_cat = ohe.transform(tmp[cat_cols])
    x = np.concatenate([x_num, x_cat], axis=1)
    return x

def predict_topN(input_dict, N=3):
    IRRIGATION_TIPS = {} # Reduced for brevity
    x = build_feature_row(input_dict, ohe, num_cols, cat_cols, scaler, group_means, season_defaults)
    probs = model.predict_proba(x)[0]
    top_idx = np.argsort(-probs)[:N]
    crops = [le_y.id_to_class[i] for i in top_idx]
    confs = [float(probs[i]) for i in top_idx]
    tips = {c: IRRIGATION_TIPS.get(c, ["Follow crop-specific schedule."]) for c in crops}
    return list(zip(crops, confs)), tips

# --- MANUALLY INPUT YOUR DATA HERE ---
manual_input_data = {
    "District": "Srikakulam",
    "Season": "Zaid",
    "Water_Source": "Tank",
    "Previous_Crop": "Rice",
    "Soil_pH": 6.6,
    "Organic_Carbon_pct": 0.5,
    "Soil_N_kg_ha": 190,
    "Soil_P_kg_ha": 20,
    "Soil_K_kg_ha": 180,
    "Avg_Temp_C": 29.5,
    "Seasonal_Rainfall_mm": 550,
    "Avg_Humidity_pct": 780,
    "Market_Price_Index": 0.8
}

# --- Run prediction and print results ---
print("\n--- Generating predictions for your manual input ---")
predictions, _ = predict_topN(manual_input_data, N=3)

for crop, confidence in predictions:
    print(f"\n🌾 Predicted Crop: {crop}")
    print(f"   Confidence: {confidence:.3f}")

print("\n--- Prediction complete ---")


✅ Model and encoders loaded successfully.

--- Generating predictions for your manual input ---

🌾 Predicted Crop: Paddy
   Confidence: 1.000

🌾 Predicted Crop: Cotton
   Confidence: 0.000

🌾 Predicted Crop: Groundnut
   Confidence: 0.000

--- Prediction complete ---


In [6]:
manual_input_data = {
    "District": "Srikakulam",
    "Season": "Kharif",
    "Water_Source": "Tank",
    "Previous_Crop": "Vegetables",
    "Soil_pH": 6.6,
    "Organic_Carbon_pct": 0.7,
    "Soil_N_kg_ha": 200,
    "Soil_P_kg_ha": 18,
    "Soil_K_kg_ha": 180,
    "Avg_Temp_C": 29.0,         # Imputed default for Kharif season
    "Seasonal_Rainfall_mm": 600, # Imputed default for Kharif season
    "Avg_Humidity_pct": 78.0,    # Imputed default for Kharif season
    "Market_Price_Index": 0.75
}

# --- Run prediction and print results ---
print("\n--- Generating predictions for your manual input ---")
predictions, _ = predict_topN(manual_input_data, N=3)

for crop, confidence in predictions:
    print(f"\n🌾 Predicted Crop: {crop}")
    print(f"   Confidence: {confidence:.3f}")

print("\n--- Prediction complete ---")



--- Generating predictions for your manual input ---

🌾 Predicted Crop: Paddy
   Confidence: 0.956

🌾 Predicted Crop: Groundnut
   Confidence: 0.022

🌾 Predicted Crop: Cotton
   Confidence: 0.020

--- Prediction complete ---


In [7]:
# Assume typical soil test results in ppm
ppm_N = 80 # Example value
ppm_P = 60  # Example value
ppm_K = 40  # Example value

# Conversion factor for a standard plow depth
conversion_factor = 2.24

# Convert ppm to kg/ha
kg_ha_N = ppm_N * conversion_factor
kg_ha_P = ppm_P * conversion_factor
kg_ha_K = ppm_K * conversion_factor

# Manual input data with converted values
manual_input_data = {
    "District": "Srikakulam",
    "Season": "Kharif",
    "Water_Source": "Tank",
    "Previous_Crop": "Vegetables",
    "Soil_pH": 6.6,
    "Organic_Carbon_pct": 0.7,
    "Soil_N_kg_ha": kg_ha_N,
    "Soil_P_kg_ha": kg_ha_P,
    "Soil_K_kg_ha": kg_ha_K,
    "Avg_Temp_C": 29.0,
    "Seasonal_Rainfall_mm": 600,
    "Avg_Humidity_pct": 82.0,
    "Market_Price_Index": 0.75
}

# --- Run prediction and print results ---
print("\n--- Generating predictions for your manual input ---")
predictions, _ = predict_topN(manual_input_data, N=3)

for crop, confidence in predictions:
    print(f"\n🌾 Predicted Crop: {crop}")
    print(f"   Confidence: {confidence:.3f}")

print("\n--- Prediction complete ---")



--- Generating predictions for your manual input ---

🌾 Predicted Crop: Paddy
   Confidence: 1.000

🌾 Predicted Crop: Vegetables
   Confidence: 0.000

🌾 Predicted Crop: Cotton
   Confidence: 0.000

--- Prediction complete ---
