In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:

!pip install numpy pandas scikit-learn xgboost joblib requests


!pip install earthengine-api geemap


Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->ipyfilechooser>=0.6.0->geemap)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.6/1.6 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


In [3]:
import ee
import pandas as pd
import numpy as np
import joblib

ee.Authenticate()
ee.Initialize(project="pest2-481305")


In [4]:
import pandas as pd

df_rice = pd.read_csv("RICE.csv")


In [6]:
print(df_rice.columns.tolist())


['Observation Year', 'Standard Week', 'Pest Value', 'Collection Type', 'MaxT', 'MinT', 'RH1(%)', 'RH2(%)', 'RF(mm)', 'WS(kmph)', 'SSH(hrs)', 'EVP(mm)', 'PEST NAME', 'Location']


In [7]:
df_rice["risk_bin"] = (df_rice["Pest Value"] > 0).astype(int)


In [9]:

df_rice["humidity"] = (df_rice["RH1(%)"] + df_rice["RH2(%)"]) / 2

FEATURES_RICE = [
    "MaxT",
    "MinT",
    "humidity",
    "RF(mm)",
    "WS(kmph)",
    "SSH(hrs)",
    "EVP(mm)"
]

X = df_rice[FEATURES_RICE]
y = df_rice["risk_bin"]


In [10]:
from sklearn.model_selection import train_test_split

X_train_real, X_test_real, y_train_real, y_test_real = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

train_rice = X_train_real.copy()
train_rice["risk_bin"] = y_train_real

test_rice = X_test_real.copy()
test_rice["risk_bin"] = y_test_real


In [11]:
import numpy as np

def augment_rice(df, n=3):
    rows = []
    for _, r in df.iterrows():
        for _ in range(n):
            nr = r.copy()
            nr["MaxT"] += np.random.normal(0, 1)
            nr["MinT"] += np.random.normal(0, 1)
            nr["humidity"] = np.clip(r["humidity"] + np.random.normal(0, 5), 0, 100)
            nr["RF(mm)"] = max(0, r["RF(mm)"] * np.random.uniform(0.9, 1.1))
            nr["WS(kmph)"] = max(0, r["WS(kmph)"] + np.random.normal(0, 0.5))
            nr["SSH(hrs)"] = max(0, r["SSH(hrs)"] + np.random.normal(0, 0.5))
            nr["EVP(mm)"] = max(0, r["EVP(mm)"] * np.random.uniform(0.9, 1.1))
            rows.append(nr)
    return pd.DataFrame(rows)

train_rice_aug = augment_rice(train_rice, n=3)
train_rice_full = pd.concat([train_rice, train_rice_aug], ignore_index=True)


In [12]:
from xgboost import XGBClassifier

rice_model = XGBClassifier(
    objective="binary:logistic",
    n_estimators=300,
    max_depth=2,
    learning_rate=0.03,
    subsample=0.65,
    colsample_bytree=0.65,
    min_child_weight=15,
    reg_alpha=3.0,
    reg_lambda=6.0,
    gamma=1.0,
    random_state=42
)

rice_model.fit(
    train_rice_full[FEATURES_RICE],
    train_rice_full["risk_bin"]
)


In [13]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = rice_model.predict(test_rice[FEATURES_RICE])

print(classification_report(test_rice["risk_bin"], y_pred))
print(confusion_matrix(test_rice["risk_bin"], y_pred))


              precision    recall  f1-score   support

           0       0.71      0.61      0.65      1734
           1       0.72      0.80      0.75      2147

    accuracy                           0.71      3881
   macro avg       0.71      0.70      0.70      3881
weighted avg       0.71      0.71      0.71      3881

[[1056  678]
 [ 437 1710]]


In [14]:
import joblib

joblib.dump(rice_model, "rice_pest_binary_xgb.joblib")


['rice_pest_binary_xgb.joblib']

In [15]:
df_cotton = pd.read_excel("Cotton_ICAR_Data.xlsx")


In [16]:
df_cotton["risk_bin"] = (df_cotton["Pest Value"] > 0).astype(int)


In [17]:
df_cotton["humidity"] = (df_cotton["RH1(%)"] + df_cotton["RH2(%)"]) / 2

FEATURES_COTTON = [
    "MaxT(¬∞C)",
    "MinT(¬∞C)",
    "humidity",
    "RF(mm)",
    "WS(kmph)",
    "SSH(hrs)",
    "EVP(mm)"
]

X = df_cotton[FEATURES_COTTON]
y = df_cotton["risk_bin"]


In [18]:
X_train_real, X_test_real, y_train_real, y_test_real = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

train_cotton = X_train_real.copy()
train_cotton["risk_bin"] = y_train_real

test_cotton = X_test_real.copy()
test_cotton["risk_bin"] = y_test_real


In [19]:
def augment_cotton(df, n=3):
    rows = []
    for _, r in df.iterrows():
        for _ in range(n):
            nr = r.copy()
            nr["MaxT(¬∞C)"] += np.random.normal(0, 1)
            nr["MinT(¬∞C)"] += np.random.normal(0, 1)
            nr["humidity"] = np.clip(r["humidity"] + np.random.normal(0, 5), 0, 100)
            nr["RF(mm)"] = max(0, r["RF(mm)"] * np.random.uniform(0.9, 1.1))
            nr["WS(kmph)"] = max(0, r["WS(kmph)"] + np.random.normal(0, 0.5))
            nr["SSH(hrs)"] = max(0, r["SSH(hrs)"] + np.random.normal(0, 0.5))
            nr["EVP(mm)"] = max(0, r["EVP(mm)"] * np.random.uniform(0.9, 1.1))
            rows.append(nr)
    return pd.DataFrame(rows)

train_cotton_aug = augment_cotton(train_cotton, n=3)
train_cotton_full = pd.concat([train_cotton, train_cotton_aug], ignore_index=True)


In [20]:
cotton_model = XGBClassifier(
    objective="binary:logistic",
    n_estimators=300,
    max_depth=2,
    learning_rate=0.03,
    subsample=0.65,
    colsample_bytree=0.65,
    min_child_weight=15,
    reg_alpha=3.0,
    reg_lambda=6.0,
    gamma=1.0,
    random_state=42
)

cotton_model.fit(
    train_cotton_full[FEATURES_COTTON],
    train_cotton_full["risk_bin"]
)


In [21]:
from sklearn.metrics import classification_report, confusion_matrix

THRESHOLD = 0.35
probs = cotton_model.predict_proba(test_cotton[FEATURES_COTTON])[:, 1]
y_pred_thresh = (probs >= THRESHOLD).astype(int)

print(classification_report(test_cotton["risk_bin"], y_pred_thresh))
print(confusion_matrix(test_cotton["risk_bin"], y_pred_thresh))


              precision    recall  f1-score   support

           0       0.90      0.70      0.79      2307
           1       0.51      0.80      0.63       912

    accuracy                           0.73      3219
   macro avg       0.71      0.75      0.71      3219
weighted avg       0.79      0.73      0.74      3219

[[1609  698]
 [ 180  732]]


In [22]:
joblib.dump(
    {
        "model": cotton_model,
        "threshold": THRESHOLD
    },
    "cotton_pest_binary_model.pkl"
)


['cotton_pest_binary_model.pkl']

In [24]:
import requests
import urllib.parse

FAST2SMS_API_KEY = "PmNaUHyGZM61sI9cAlquiVbQC0vzXgrwnke4p3xFBfTLjEW5DR0buTEzGsNAJkp6DyU513ei47ZoRhcY"


def send_pest_sms(phone_number, crop, risk, probability=None):
    if risk == 0:
        return "No SMS sent (low risk)"

    message = f"{crop} Pest Alert!\nPest risk detected.\n"

    if probability is not None:
        message += f"Confidence: {int(probability*100)}%\n"

    message += (
        "Advice:\n"
        "‚Ä¢ Monitor crop closely\n"
        "‚Ä¢ Install pheromone traps\n"
        "‚Ä¢ Use neem-based spray\n"
        "‚Ä¢ Avoid chemicals unless ETL crossed\n"
    )

    url = "https://www.fast2sms.com/dev/bulkV2"
    params = {
        "authorization": FAST2SMS_API_KEY,
        "route": "q",
        "message": message,
        "numbers": phone_number
    }

    response = requests.get(url + "?" + urllib.parse.urlencode(params))
    return response.text


In [25]:
X_new = test_cotton[FEATURES_COTTON].iloc[[0]]

prob = cotton_model.predict_proba(X_new)[0][1]
risk = int(prob >= THRESHOLD)

print("Probability:", prob)
print("Risk:", risk)

send_pest_sms(
    phone_number="9920834638",
    crop="Cotton",
    risk=risk,
    probability=prob
)


Probability: 0.1639474
Risk: 0


'No SMS sent (low risk)'

In [27]:
high_risk_row = test_cotton[test_cotton["risk_bin"] == 1].iloc[[0]]
X_new = high_risk_row[FEATURES_COTTON]

prob = cotton_model.predict_proba(X_new)[0][1]
risk = int(prob >= THRESHOLD)

print("Probability:", prob)
print("Risk:", risk)

send_pest_sms(
    phone_number="9167975229",
    crop="Cotton",
    risk=risk,
    probability=prob
)


Probability: 0.4535384
Risk: 1


'{"return":true,"request_id":"an7TeZzHF16WSgl","message":["SMS sent successfully."]}'

In [28]:
app_py = '''
import streamlit as st
import pandas as pd

# -----------------------------
# App Configuration
# -----------------------------
st.set_page_config(
    page_title="AI Pest Risk Alert System",
    page_icon="üåæ",
    layout="centered"
)

# -----------------------------
# Load Location Data
# -----------------------------
locations = pd.read_csv("locations.csv")

# -----------------------------
# Header
# -----------------------------
st.title(" AI Pest Risk Alert System")
st.write(
    "Early-warning pest risk alerts for **Rice** and **Cotton** crops "
    "using weather and satellite-derived indicators."
)

st.divider()

# -----------------------------
# How it Works (Explainability)
# -----------------------------
with st.expander(" How this system works"):
    st.write("""
    - Pest risk is predicted at **village level**
    - All farms in the same village receive the same alert
    - Uses **weather and satellite crop health data**
    - Provides **early-warning risk**, not pest detection
    - Alerts are preventive and IPM-oriented
    """)

# -----------------------------
# Crop Selection
# -----------------------------
st.subheader(" Select Crop")
crop = st.radio(
    "Choose your crop",
    ["Rice", "Cotton"],
    horizontal=True
)

# -----------------------------
# Location Selection
# -----------------------------
st.subheader(" Select Your Village")

district = st.selectbox(
    "District",
    sorted(locations["district"].unique())
)

taluka = st.selectbox(
    "Taluka",
    sorted(locations[locations["district"] == district]["taluka"].unique())
)

village = st.selectbox(
    "Village",
    locations[
        (locations["district"] == district) &
        (locations["taluka"] == taluka)
    ]["village"].unique()
)

loc_row = locations[locations["village"] == village].iloc[0]
lat, lon = loc_row["lat"], loc_row["lon"]

# -----------------------------
# Phone Number (Optional)
# -----------------------------
st.subheader("3Ô∏è SMS Alert (Optional)")
phone = st.text_input(
    "Mobile Number (for SMS alert if pest risk is detected)",
    placeholder="10-digit mobile number"
)

# -----------------------------
# Prediction Trigger
# -----------------------------
st.divider()

if st.button("Check Pest Risk", use_container_width=True):

    with st.spinner("Analyzing crop and weather conditions..."):
        # -----------------------------
        # MOCK PREDICTION LOGIC
        # (Replace with real ML later)
        # -----------------------------
        if crop == "Rice":
            pest_risk = 0  # binary: 0 = low, 1 = risk
        else:
            pest_risk = 1

    st.divider()

    # -----------------------------
    # Results Display
    # -----------------------------
    if pest_risk == 0:
        st.success("No significant pest risk detected in your village.")
        st.write("**Advisory:** Continue regular crop monitoring.")

    else:
        st.error(" Pest risk detected in your village.")
        st.write("**Recommended actions:**")
        st.markdown("""
        - Monitor crop closely
        - Use Integrated Pest Management (IPM) practices
        - Avoid unnecessary chemical spraying
        """)

        if phone.strip():
            st.info(" SMS alert has been sent to the registered number.")

# -----------------------------
# Footer
# -----------------------------
st.divider()
st.caption(
    "This system provides **village-level early warning alerts** "
    "based on environmental conditions. "
    "It is not a diagnostic tool."
)
'''

In [29]:
requirements_txt = '''
streamlit
pandas
numpy
joblib
requests
'''

In [30]:
with open("app.py", "w") as f:
    f.write(app_py)

with open("requirements.txt", "w") as f:
    f.write(requirements_txt)

!ls

app.py			      drive		rice_pest_binary_xgb.joblib
Cotton_ICAR_Data.xlsx	      requirements.txt	sample_data
cotton_pest_binary_model.pkl  RICE.csv


In [31]:
from google.colab import files

files.download("app.py")
files.download("requirements.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>