In [1]:
# agriconnect_pipeline.py
"""
AGRICONNECT - ANN demo pipeline
- Generates synthetic agricultural dataset (local/regional features)
- Trains two ANN models:
    1) Crop recommendation (classification)
    2) Yield estimation (regression)
- Saves models and metadata to ./models
- Provides a predict function and a small multilingual-friendly QA helper
This file is intended as a runnable prototype for the AGRICONNECT project.
"""

'\nAGRICONNECT - ANN demo pipeline\n- Generates synthetic agricultural dataset (local/regional features)\n- Trains two ANN models:\n    1) Crop recommendation (classification)\n    2) Yield estimation (regression)\n- Saves models and metadata to ./models\n- Provides a predict function and a small multilingual-friendly QA helper\nThis file is intended as a runnable prototype for the AGRICONNECT project.\n'

In [2]:
import os
import json
import numpy as np
import pandas as pd
import joblib

from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

In [3]:
os.makedirs("data", exist_ok=True)
os.makedirs("models", exist_ok=True)

In [4]:
def generate_synthetic_agri_data(n=800, seed=42, out_path="data/synthetic_agriconnect.csv"):
    """
    Create a synthetic dataset that mimics region-level agricultural records.
    Columns:
      - district: integer id (1..50)
      - soil_ph: 5.0 - 8.0
      - rainfall_mm: 0 - 500
      - temp_c: 15 - 38
      - prev_yield: previous season yield (t/ha)
      - season: integer code (0,1,2)
      - crop: categorical (wheat, rice, maize, soybean, groundnut)
      - yield_t: current season yield (t/ha)
    Saves CSV to out_path and returns DataFrame.
    """
    np.random.seed(seed)

    district = np.random.randint(1, 51, n)
    soil_ph = np.round(np.random.uniform(5.0, 8.0, n), 2)
    rainfall = np.round(np.random.uniform(0, 500, n), 1)
    temp = np.round(np.random.uniform(15, 38, n), 1)
    prev_yield = np.round(np.random.uniform(0.5, 4.0, n), 2)
    season = np.random.choice([0, 1, 2], n)

    crops = ['wheat', 'rice', 'maize', 'soybean', 'groundnut']

    # crop suitability scoring (simple heuristic: higher -> more suitable)
    crop_scores = np.vstack([
        (100 - np.abs(rainfall - 300)) + (20 - np.abs(temp - 22)),  # wheat prefers moderate-high rain, ~22C
        (100 - np.abs(rainfall - 80))  + (10 - np.abs(temp - 12)),  # rice prefers lower rain (synthetic here) & low temp
        (80  - np.abs(rainfall - 150)) + (15 - np.abs(temp - 25)),  # maize
        (70  - np.abs(rainfall - 120)) + (10 - np.abs(temp - 28)),  # soybean
        (60  - np.abs(rainfall - 60))  + (12 - np.abs(temp - 30)),  # groundnut
    ]).T

    crop_idx = crop_scores.argmax(axis=1)
    crop = [crops[i] for i in crop_idx]

    # base_yield uses prev_yield, rainfall, and soil pH as drivers (toy formula)
    base_yield = prev_yield * (1 + (rainfall / 1000.0)) * (8 - np.abs(7 - soil_ph)) / 8.0
    crop_factor = np.array([1.0, 1.1, 0.9, 1.05, 0.95])[crop_idx]
    yield_t = np.round(base_yield * crop_factor + np.random.normal(0, 0.2, n), 2)
    yield_t = np.clip(yield_t, 0.2, None)

    df = pd.DataFrame({
        'district': district,
        'soil_ph': soil_ph,
        'rainfall_mm': rainfall,
        'temp_c': temp,
        'prev_yield': prev_yield,
        'season': season,
        'crop': crop,
        'yield_t': yield_t
    })

    df.to_csv(out_path, index=False)
    return df

In [5]:
print("Generating synthetic AGRICONNECT dataset...")
df = generate_synthetic_agri_data(n=800)
print("Rows generated:", len(df))

Generating synthetic AGRICONNECT dataset...
Rows generated: 800


In [6]:
feature_cols = ['district', 'soil_ph', 'rainfall_mm', 'temp_c', 'prev_yield', 'season']
X = df[feature_cols]
y_crop = df['crop']      # classification target
y_yield = df['yield_t']  # regression target

In [7]:
# Label-encode crop names and save encoder for later inverse transform
le = LabelEncoder()
y_crop_enc = le.fit_transform(y_crop)
joblib.dump(le, "models/label_encoder.joblib")

['models/label_encoder.joblib']

In [8]:
# ---------------------------------------------------------------------
# Preprocessing pipelines
# ---------------------------------------------------------------------
num_features = ['soil_ph', 'rainfall_mm', 'temp_c', 'prev_yield']
cat_features = ['district', 'season']

num_transform = Pipeline([('scaler', StandardScaler())])
cat_transform = Pipeline([('ohe', OneHotEncoder(handle_unknown='ignore', sparse_output=False))])

preprocessor = ColumnTransformer([
    ('num', num_transform, num_features),
    ('cat', cat_transform, cat_features)
])

In [9]:
# ---------------------------------------------------------------------
# Train / Test split
# ---------------------------------------------------------------------
X_train, X_test, y_crop_train, y_crop_test, y_yield_train, y_yield_test = train_test_split(
    X, y_crop_enc, y_yield, test_size=0.2, random_state=42
)

In [10]:
# ANN classifier pipeline (crop recommendation)
# ---------------------------------------------------------------------
clf_pipeline = Pipeline([
    ('pre', preprocessor),
    ('mlp', MLPClassifier(hidden_layer_sizes=(32, 16), max_iter=150, random_state=42))
])

print("Training crop classifier (ANN)...")
clf_pipeline.fit(X_train, y_crop_train)
joblib.dump(clf_pipeline, "models/ann_crop_classifier.joblib")

Training crop classifier (ANN)...




['models/ann_crop_classifier.joblib']

In [11]:
# ANN regressor pipeline (yield estimation)
# ---------------------------------------------------------------------
reg_pipeline = Pipeline([
    ('pre', preprocessor),
    ('mlpr', MLPRegressor(hidden_layer_sizes=(32, 16), max_iter=150, random_state=42))
])

print("Training yield regressor (ANN)...")
reg_pipeline.fit(X_train, y_yield_train)
joblib.dump(reg_pipeline, "models/ann_yield_regressor.joblib")

Training yield regressor (ANN)...




['models/ann_yield_regressor.joblib']

In [12]:
# Evaluation
# ---------------------------------------------------------------------
y_crop_pred = clf_pipeline.predict(X_test)
crop_acc = accuracy_score(y_crop_test, y_crop_pred)

y_yield_pred = reg_pipeline.predict(X_test)
y_mse = mean_squared_error(y_yield_test, y_yield_pred)
y_rmse = float(np.sqrt(y_mse))

print(f"Evaluation -> Crop accuracy: {crop_acc:.3f}, Yield RMSE: {y_rmse:.3f}")

Evaluation -> Crop accuracy: 0.894, Yield RMSE: 0.332


In [13]:
# Save models metadata
# ---------------------------------------------------------------------
meta = {
    "project": "AGRICONNECT (AGRINOVA)",
    "description": "Prototype ANN-based crop recommendation and yield estimation",
    "metrics": {"crop_accuracy": float(crop_acc), "yield_rmse": float(y_rmse)},
    "artifacts": {
        "label_encoder": "models/label_encoder.joblib",
        "crop_classifier": "models/ann_crop_classifier.joblib",
        "yield_regressor": "models/ann_yield_regressor.joblib"
    }
}
with open("models/meta.json", "w") as f:
    json.dump(meta, f, indent=2)

In [14]:
# Load saved artifacts for inference (demonstrates persistence)
# ---------------------------------------------------------------------
label_encoder = joblib.load("models/label_encoder.joblib")
clf = joblib.load("models/ann_crop_classifier.joblib")
reg = joblib.load("models/ann_yield_regressor.joblib")

In [17]:
def predict_crop_and_yield(district: int, soil_ph: float, rainfall_mm: float,
                           temp_c: float, prev_yield: float, season: int):
    """
    Given feature inputs, returns:
      - recommended_crop (string)
      - estimated_yield_t_per_ha (float)
      - confidence (max predicted probability from classifier, or None if not available)
    """
    X_in = pd.DataFrame([{
        'district': int(district),
        'soil_ph': float(soil_ph),
        'rainfall_mm': float(rainfall_mm),
        'temp_c': float(temp_c),
        'prev_yield': float(prev_yield),
        'season': int(season)
    }])

    crop_idx = clf.predict(X_in)[0]
    crop_name = label_encoder.inverse_transform([crop_idx])[0]

    yield_pred = float(reg.predict(X_in)[0])
       # classifier probability/confidence if available
    conf = None
    try:
        proba = clf.predict_proba(X_in)[0]
        conf = float(proba.max())
    except Exception:
        conf = None

    return {
        'recommended_crop': crop_name,
        'estimated_yield_t_per_ha': round(yield_pred, 2),
        'confidence': conf
    }

In [18]:
# Simple multilingual-friendly QA (FAQ + rule fallbacks)
# ---------------------------------------------------------------------
faq_db = {
    "how to apply pm kisan": "Visit pmkisan.gov.in and register using Aadhaar-linked bank account. Local CSCs can help with enrollment.",
    "when to irrigate rice": "Irrigate at tillering and panicle initiation stages; avoid irrigating right before forecasted heavy rain.",
    "where to buy certified seeds": "Contact your nearest KVK or authorised seed dealer; Agmarknet lists mandi-level information."
}

In [19]:

def simple_qa(question: str):
    """
    Small QA helper:
     - Basic Hindi-to-English keyword mapping (very light)
     - Exact-match lookup in faq_db
     - Falls back to simple rules for irrigation/prices
    """
    q = question.strip().lower()
    # tiny Hindi-like token map (keeps the prototype simple)
    hindi_map = {"pani": "irrigat", "beej": "seed", "keemat": "price", "mandi": "market", "kab": "when"}
    for h, k in hindi_map.items():
        if h in q:
            q = q.replace(h, k)

    # exact-key matching (prototype)
    for k in faq_db:
        if k in q:
            return {"answer": faq_db[k], "source": "faq_db"}

    # simple rules
    if "irrigat" in q or "water" in q:
        return {
            "answer": "Check recent rainfall forecasts and soil moisture. Prefer irrigation when soil moisture drops below crop-specific thresholds; avoid irrigating if heavy rain is expected.",
            "source": "rule"
        }
    if "price" in q or "mandi" in q or "market" in q:
        return {
            "answer": "Use Agmarknet or local mandi reports for up-to-date prices. Contact local mandi/collection center for real-time rates.",
            "source": "rule"
        }

    return {"answer": "Sorry, I do not have an answer for that. Try asking about irrigation, prices, or government schemes (e.g., PM Kisan).", "source": "none"}

# ---------------------------------------------------------------------
# Demo usage
# ---------------------------------------------------------------------
demo_features = {
    'district': 12,
    'soil_ph': 6.7,
    'rainfall_mm': 140.0,
    'temp_c': 26.0,
    'prev_yield': 1.3,
    'season': 0
}

print("Demo prediction:", predict_crop_and_yield(**demo_features))
print("QA demo (English):", simple_qa("When should I irrigate rice?"))
print("QA demo (Hindi-like):", simple_qa("Pani kab dena chahiye?"))

print("Saved models and metadata to ./models/")

Demo prediction: {'recommended_crop': 'maize', 'estimated_yield_t_per_ha': 1.17, 'confidence': 0.8935137819988276}
QA demo (English): {'answer': 'Check recent rainfall forecasts and soil moisture. Prefer irrigation when soil moisture drops below crop-specific thresholds; avoid irrigating if heavy rain is expected.', 'source': 'rule'}
QA demo (Hindi-like): {'answer': 'Check recent rainfall forecasts and soil moisture. Prefer irrigation when soil moisture drops below crop-specific thresholds; avoid irrigating if heavy rain is expected.', 'source': 'rule'}
Saved models and metadata to ./models/


In [20]:
# agriconnect_api.py
"""
AGRICONNECT API - FastAPI backend
- Serves ANN crop recommendation + yield estimation
- Provides multilingual-friendly QA
- Captures user feedback for continuous improvement
"""

'\nAGRICONNECT API - FastAPI backend\n- Serves ANN crop recommendation + yield estimation\n- Provides multilingual-friendly QA\n- Captures user feedback for continuous improvement\n'

In [21]:
from fastapi import FastAPI, Query
from pydantic import BaseModel
from datetime import datetime

In [22]:
# Load trained models & encoder
clf = joblib.load("models/ann_crop_classifier.joblib")
reg = joblib.load("models/ann_yield_regressor.joblib")
label_encoder = joblib.load("models/label_encoder.joblib")

In [23]:
# Initialize FastAPI app
app = FastAPI(title="AGRICONNECT API", description="AI-powered agricultural advisory backend", version="1.0")


In [24]:
faq_db = {
    "how to apply pm kisan": "Visit pmkisan.gov.in and register using Aadhaar-linked bank account. Local CSCs can help with enrollment.",
    "when to irrigate rice": "Irrigate at tillering and panicle initiation stages; avoid irrigating before forecasted heavy rain.",
    "where to buy certified seeds": "Contact your nearest KVK or authorised seed dealer; Agmarknet lists mandi-level info."
}

In [25]:
def simple_qa(question: str):
    q = question.strip().lower()
    hindi_map = {"pani": "irrigat", "beej": "seed", "keemat": "price", "mandi": "market", "kab": "when"}
    for h, k in hindi_map.items():
        if h in q:
            q = q.replace(h, k)

    for k in faq_db:
        if k in q:
            return {"answer": faq_db[k], "source": "faq_db"}
    if "irrigat" in q or "water" in q:
        return {"answer": "Check rainfall forecast & soil moisture; irrigate when moisture is low.", "source": "rule"}
    if "price" in q or "mandi" in q or "market" in q:
        return {"answer": "Check Agmarknet or local mandi reports for latest prices.", "source": "rule"}
    return {"answer": "Sorry, no answer found. Try asking about irrigation, prices, or PM Kisan.", "source": "none"}


In [26]:
def predict_crop_and_yield(district, soil_ph, rainfall_mm, temp_c, prev_yield, season):
    X_in = pd.DataFrame([{
        'district': int(district),
        'soil_ph': float(soil_ph),
        'rainfall_mm': float(rainfall_mm),
        'temp_c': float(temp_c),
        'prev_yield': float(prev_yield),
        'season': int(season)
    }])
    crop_idx = clf.predict(X_in)[0]
    crop_name = label_encoder.inverse_transform([crop_idx])[0]
    yield_pred = float(reg.predict(X_in)[0])
    try:
        proba = clf.predict_proba(X_in)[0]
        conf = float(proba.max())
    except Exception:
        conf = None
    return {"recommended_crop": crop_name, "estimated_yield_t_per_ha": round(yield_pred, 2), "confidence": conf}


In [27]:
# Request models
# ---------------------------
class PredictRequest(BaseModel):
    district: int
    soil_ph: float
    rainfall_mm: float
    temp_c: float
    prev_yield: float
    season: int

class QARequest(BaseModel):
    question: str

class FeedbackRequest(BaseModel):
    feature_data: dict
    prediction: dict
    user_feedback: str

In [28]:
# ---------------------------
# API Routes
# ---------------------------

@app.get("/")
def root():
    return {"message": "Welcome to AGRICONNECT API", "status": "running"}

@app.post("/predict")
def predict_endpoint(req: PredictRequest):
    result = predict_crop_and_yield(**req.dict())
    return {"input": req.dict(), "prediction": result}

@app.post("/qa")
def qa_endpoint(req: QARequest):
    result = simple_qa(req.question)
    return {"question": req.question, "answer": result}

@app.post("/feedback")
def feedback_endpoint(req: FeedbackRequest):
    os.makedirs("feedback", exist_ok=True)
    feedback_path = "feedback/user_feedback.csv"
    feedback_df = pd.DataFrame([{
        "timestamp": datetime.now().isoformat(),
        "feature_data": json.dumps(req.feature_data),
        "prediction": json.dumps(req.prediction),
        "user_feedback": req.user_feedback
    }])
    if os.path.exists(feedback_path):
        feedback_df.to_csv(feedback_path, mode='a', header=False, index=False)
    else:
        feedback_df.to_csv(feedback_path, index=False)
    return {"status": "saved", "file": feedback_path}

In [29]:
pip install streamlit scikit-learn joblib pandas requests


Note: you may need to restart the kernel to use updated packages.


In [37]:
#!streamlit run agriconnect_streamlit.py


In [33]:
ls

 Volume in drive C is Windows
 Volume Serial Number is 3AFB-8EC9

 Directory of C:\Users\aryan\agriconnect

09-08-2025  15:20    <DIR>          .
09-08-2025  01:21    <DIR>          ..
09-08-2025  15:18    <DIR>          .ipynb_checkpoints
09-08-2025  15:20            32,874 agriconnect_streamlit.ipynb
09-08-2025  01:44            14,594 code_model_1.ipynb
09-08-2025  15:20            32,795 code_model_2.ipynb
09-08-2025  14:40    <DIR>          data
09-08-2025  01:40    <DIR>          models
               3 File(s)         80,263 bytes
               5 Dir(s)  16,960,937,984 bytes free
