In [1]:
import pandas as pd
from sklearn.ensemble import IsolationForest

In [2]:
df = pd.read_csv('crop_yield.csv')

In [3]:
features = ["Area", "Production", "Annual_Rainfall", "Fertilizer", "Pesticide", "Yield"]
df[features] = df[features].fillna(0)

In [4]:
clf = IsolationForest(contamination=0.1, random_state=42)
df['anomaly'] = clf.fit_predict(df[features])

In [6]:
anomalies = df[df['anomaly'] == -1].copy()
avg_by_crop = df.groupby("Crop")[features].mean().reset_index()
anomalies = pd.merge(anomalies, avg_by_crop, on="Crop", suffixes=('', '_avg'))

In [7]:
avg_by_crop = df.groupby("Crop")[features].mean().reset_index()
anomalies = pd.merge(anomalies, avg_by_crop, on="Crop", suffixes=('', '_avg'))


In [23]:
print(anomalies[[
    "Crop", "State", "Area", "Production", "Yield", "Fertilizer", "Fertilizer_avg",
    "Annual_Rainfall", "Annual_Rainfall_avg", "Pesticide", "Pesticide_avg",
    "Reasons", "Remedies"
]].sample(10, random_state=42))

              Crop           State       Area  Production         Yield  \
752   Cotton(lint)     Maharashtra  3199100.0     2618900      1.040435   
765      Groundnut      Tamil Nadu  1086462.0     1960643      1.822963   
1654          Rice         Haryana  1384700.0     4451000      2.950500   
1251          Gram     Maharashtra  1291400.0     1114400      0.795806   
1161         Bajra         Haryana   627983.0     1156000      2.016000   
694           Rice         Gujarat   672600.0     1042300      1.665000   
70       Sugarcane          Kerala     5780.0      578820     88.222500   
1419  Cotton(lint)  Madhya Pradesh   593642.0     1162620      1.714444   
56        Coconut      West Bengal    24616.0   318174400  12448.062220   
1116         Maize  Madhya Pradesh   879420.0      846002      0.903617   

       Fertilizer  Fertilizer_avg  Annual_Rainfall  Annual_Rainfall_avg  \
752   316071080.0    6.415998e+07           1314.8          1448.085438   
765   107342445.6    2.2

# Example: check by Crop and State
crop_name = input("Enter crop name: ")
state_name = input("Enter state name: ")

# You can add more filters if needed (like Season or Year)
filtered = anomalies[
    (anomalies['Crop'].str.lower() == crop_name.lower()) &
    (anomalies['State'].str.lower() == state_name.lower())
]

if not filtered.empty:
    for idx, row in filtered.iterrows():
        print(f"\nResult for {row['Crop']} in {row['State']}:")
        print(f"  Area: {row['Area']}, Production: {row['Production']}, Yield: {row['Yield']}")
        print(f"  Fertilizer: {row['Fertilizer']} (Avg: {row['Fertilizer_avg']})")
        print(f"  Rainfall: {row['Annual_Rainfall']} (Avg: {row['Annual_Rainfall_avg']})")
        print(f"  Pesticide: {row['Pesticide']} (Avg: {row['Pesticide_avg']})")
        print(f"  REASON: {row['Reasons']}")
        print(f"  SUGGESTION: {row['Remedies']}")
else:
    print("No anomaly detected for that crop/state combination.")


In [24]:
# <-- Add this line to check actual column names after merge

def explain_row(row):
    reasons = []
    remedies = []

    fert = row.get("Fertilizer", None)
    fert_avg = row.get("Fertilizer_avg", None)
    rain = row.get("Annual_Rainfall", None)
    rain_avg = row.get("Annual_Rainfall_avg", None)
    pest = row.get("Pesticide", None)
    pest_avg = row.get("Pesticide_avg", None)
    yield_ = row.get("Yield", None)
    yield_avg = row.get("Yield_avg", None)
    prod = row.get("Production", None)
    prod_avg = row.get("Production_avg", None)

    if fert is not None and fert_avg is not None and fert < 0.8 * fert_avg:
        reasons.append("Low fertilizer usage")
        remedies.append("Increase fertilizer application as per crop guidelines.")
    if rain is not None and rain_avg is not None and rain < 0.8 * rain_avg:
        reasons.append("Low rainfall compared to average")
        remedies.append("Adopt irrigation or drought-resistant crop varieties.")
    if pest is not None and pest_avg is not None and pest < 0.8 * pest_avg:
        reasons.append("Insufficient pest control")
        remedies.append("Implement integrated pest management.")
    if yield_ is not None and yield_avg is not None and yield_ < 0.8 * yield_avg:
        reasons.append("Yield lower than average for this crop")
        remedies.append("Check soil health and farming practices; provide training.")
    if prod is not None and prod_avg is not None and prod < 0.8 * prod_avg:
        reasons.append("Production is significantly lower than typical")
        remedies.append("Assess inputs and growing conditions, provide expert support.")
    if not reasons:
        reasons.append("Anomalous pattern detected")
        remedies.append("Comprehensive review of all farming inputs is recommended.")
    return pd.Series({
        "Reasons": "; ".join(reasons),
        "Remedies": "; ".join(remedies)
    })


In [25]:
explanations = anomalies.apply(explain_row, axis=1)
anomalies = pd.concat([anomalies, explanations], axis=1)

In [26]:
print(anomalies.columns.tolist())


['Crop', 'Crop_Year', 'Season', 'State', 'Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Yield', 'anomaly', 'Area_avg', 'Production_avg', 'Annual_Rainfall_avg', 'Fertilizer_avg', 'Pesticide_avg', 'Yield_avg', 'Reasons', 'Remedies', 'Reasons', 'Remedies']


In [27]:
def explain_row(row):
    reasons = []
    remedies = []

    # Always use the first set of *_avg columns (index matters if there are dups)
    fert = row["Fertilizer"]
    fert_avg = row["Fertilizer_avg"]
    rain = row["Annual_Rainfall"]
    rain_avg = row["Annual_Rainfall_avg"]
    pest = row["Pesticide"]
    pest_avg = row["Pesticide_avg"]
    yield_ = row["Yield"]
    yield_avg = row["Yield_avg"]
    prod = row["Production"]
    prod_avg = row["Production_avg"]

    if fert < 0.8 * fert_avg:
        reasons.append("Low fertilizer usage")
        remedies.append("Increase fertilizer application as per crop guidelines.")
    if rain < 0.8 * rain_avg:
        reasons.append("Low rainfall compared to average")
        remedies.append("Adopt irrigation or drought-resistant crop varieties.")
    if pest < 0.8 * pest_avg:
        reasons.append("Insufficient pest control")
        remedies.append("Implement integrated pest management.")
    if yield_ < 0.8 * yield_avg:
        reasons.append("Yield lower than average for this crop")
        remedies.append("Check soil health and farming practices; provide training.")
    if prod < 0.8 * prod_avg:
        reasons.append("Production is significantly lower than typical")
        remedies.append("Assess inputs and growing conditions, provide expert support.")
    if not reasons:
        reasons.append("Anomalous pattern detected")
        remedies.append("Comprehensive review of all farming inputs is recommended.")
    return pd.Series({
        "Reasons": "; ".join(reasons),
        "Remedies": "; ".join(remedies)
    })

# Now apply the function safely!
explanations = anomalies.apply(explain_row, axis=1)
anomalies = pd.concat([anomalies, explanations], axis=1)

In [28]:
anomalies = anomalies.loc[:,~anomalies.columns.duplicated()]


In [29]:
def explain_row(row):
    reasons = []
    remedies = []

    fert = row["Fertilizer"]
    fert_avg = row["Fertilizer_avg"]
    rain = row["Annual_Rainfall"]
    rain_avg = row["Annual_Rainfall_avg"]
    pest = row["Pesticide"]
    pest_avg = row["Pesticide_avg"]
    yield_ = row["Yield"]
    yield_avg = row["Yield_avg"]
    prod = row["Production"]
    prod_avg = row["Production_avg"]

    if fert < 0.8 * fert_avg:
        reasons.append("Low fertilizer usage")
        remedies.append("Increase fertilizer application as per crop guidelines.")
    if rain < 0.8 * rain_avg:
        reasons.append("Low rainfall compared to average")
        remedies.append("Adopt irrigation or drought-resistant crop varieties.")
    if pest < 0.8 * pest_avg:
        reasons.append("Insufficient pest control")
        remedies.append("Implement integrated pest management.")
    if yield_ < 0.8 * yield_avg:
        reasons.append("Yield lower than average for this crop")
        remedies.append("Check soil health and farming practices; provide training.")
    if prod < 0.8 * prod_avg:
        reasons.append("Production is significantly lower than typical")
        remedies.append("Assess inputs and growing conditions, provide expert support.")
    if not reasons:
        reasons.append("Anomalous pattern detected")
        remedies.append("Comprehensive review of all farming inputs is recommended.")
    return pd.Series({
        "Reasons": "; ".join(reasons),
        "Remedies": "; ".join(remedies)
    })

# Deduplicate columns before this step!
anomalies = anomalies.loc[:,~anomalies.columns.duplicated()]

explanations = anomalies.apply(explain_row, axis=1)
anomalies = pd.concat([anomalies, explanations], axis=1)


In [30]:
final_report = anomalies[[
    "Crop", "State", "Area", "Production", "Yield", "Reasons", "Remedies"
]]

# Save to CSV (you can open in Excel or Google Sheets)
final_report.to_csv("anomaly_report_with_reasons.csv", index=False)
print("Report saved as anomaly_report_with_reasons.csv")

# Optionally, view the top rows
print(final_report.head())

Report saved as anomaly_report_with_reasons.csv
        Crop      State       Area  Production        Yield  \
0   Coconut       Assam    19656.0   126905000  5238.051739   
1       Rice      Assam   607358.0      398311     0.780870   
2       Rice      Assam  1743321.0     1647296     0.941304   
3  Groundnut  Karnataka   847666.0      525562     0.709412   
4      Jowar  Karnataka  1501172.0      722773     0.510714   

                                             Reasons  \
0  Low fertilizer usage; Insufficient pest contro...   
1  Low fertilizer usage; Yield lower than average...   
2             Yield lower than average for this crop   
3             Yield lower than average for this crop   
4             Yield lower than average for this crop   

                                             Reasons  \
0  Low fertilizer usage; Insufficient pest contro...   
1  Low fertilizer usage; Yield lower than average...   
2             Yield lower than average for this crop   
3           

In [None]:
# Example: check by Crop and State
crop_name = input("Enter crop name: ")
state_name = input("Enter state name: ")

# You can add more filters if needed (like Season or Year)
filtered = anomalies[
    (anomalies['Crop'].str.lower() == crop_name.lower()) &
    (anomalies['State'].str.lower() == state_name.lower())
]

if not filtered.empty:
    for idx, row in filtered.iterrows():
        print(f"\nResult for {row['Crop']} in {row['State']}:")
        print(f"  Area: {row['Area']}, Production: {row['Production']}, Yield: {row['Yield']}")
        print(f"  Fertilizer: {row['Fertilizer']} (Avg: {row['Fertilizer_avg']})")
        print(f"  Rainfall: {row['Annual_Rainfall']} (Avg: {row['Annual_Rainfall_avg']})")
        print(f"  Pesticide: {row['Pesticide']} (Avg: {row['Pesticide_avg']})")
        print(f"  REASON: {row['Reasons']}")
        print(f"  SUGGESTION: {row['Remedies']}")
else:
    print("No anomaly detected for that crop/state combination.")