In [6]:
import streamlit as st
import pandas as pd
import re

# --- 1. Page Configuration ---
st.set_page_config(
    page_title="AgriBot - Fertilizer Assistant",
    page_icon="üå±",
    layout="centered"
)

# --- 2. Load and Preprocess Data ---
@st.cache_data
def load_data():
    try:
        # Load datasets with error handling
        # Skipping first row as per previous analysis of the files
        c1 = pd.read_csv('C1.csv', skiprows=1)
        f_dist = pd.read_csv('Fdistrict.csv', skiprows=1)

        # Clean column names (strip whitespace)
        c1.columns = c1.columns.str.strip()
        f_dist.columns = f_dist.columns.str.strip()

        # Normalize text data for easier searching
        c1['crop_lower'] = c1['crop'].astype(str).str.lower().str.strip()
        f_dist['district_lower'] = f_dist['district'].astype(str).str.lower().str.strip()
        f_dist['state_lower'] = f_dist['state'].astype(str).str.lower().str.strip()

        return c1, f_dist
    except FileNotFoundError as e:
        st.error(f"Error loading data: {e}. Please ensure C1.csv and Fdistrict.csv are in the directory.")
        return None, None

df_crop, df_dist = load_data()

# --- 3. Logic & Calculation Functions ---

def get_crop_n_req(crop_name):
    """Extracts average N requirement for a crop."""
    if df_crop is None: return None

    row = df_crop[df_crop['crop_lower'] == crop_name]
    if row.empty:
        return None

    val = row.iloc[0]['N(kg/ha)']
    # Handle ranges like "100-120"
    if '-' in str(val):
        low, high = map(float, val.split('-'))
        return (low + high) / 2
    return float(val)

def get_soil_n_data(district, state):
    """Extracts soil N data for a specific district and state."""
    if df_dist is None: return None

    # Filter by both district and state to be precise
    mask = (df_dist['district_lower'] == district) & (df_dist['state_lower'] == state)
    row = df_dist[mask]

    if row.empty:
        return None
    return row.iloc[0]['Avg. soil N(kg/ha)']

def calculate_reduction(district, state, crop):
    """Main logic to calculate urea reduction."""
    n_req = get_crop_n_req(crop)
    if n_req is None:
        return f"‚ùå I couldn't find nutrient requirements for **{crop.title()}**."

    soil_n = get_soil_n_data(district, state)
    if soil_n is None:
        return f"‚ùå I couldn't find soil data for **{district.title()}** in **{state.title()}**."

    excess_n = soil_n - n_req
    # Urea is 46% Nitrogen
    potential_savings = excess_n / 0.46

    response = (
        f"### üåæ Analysis for {crop.title()} in {district.title()}, {state.title()}\n\n"
        f"**1. Crop Requirement:** Rice needs approx **{n_req:.1f} kg/ha** of Nitrogen.\n"
        f"**2. Soil Status:** Your soil already has **{soil_n:.1f} kg/ha** of Nitrogen.\n"
        f"**3. Excess Nitrogen:** You have an excess of **{excess_n:.1f} kg/ha**.\n\n"
        f"#### üìâ Recommendation:\n"
        f"You can potentially reduce Urea application by **{potential_savings:.2f} kg/ha** "
        f"while still meeting the crop's needs."
    )
    return response

def parse_input(user_text):
    """
    Scans the user text to find District, State, and Crop names
    that exist in our database.
    """
    if df_crop is None or df_dist is None: return {}

    user_text = user_text.lower()
    found = {}

    # Check for Crops
    all_crops = df_crop['crop_lower'].unique()
    for c in all_crops:
        # Use regex to find whole words only (avoids matching 'rice' in 'price')
        if re.search(r'\b' + re.escape(c) + r'\b', user_text):
            found['crop'] = c
            break # Assume one crop per query for now

    # Check for Districts
    all_districts = df_dist['district_lower'].unique()
    for d in all_districts:
        if re.search(r'\b' + re.escape(d) + r'\b', user_text):
            found['district'] = d
            break

    # Check for States
    all_states = df_dist['state_lower'].unique()
    for s in all_states:
        if re.search(r'\b' + re.escape(s) + r'\b', user_text):
            found['state'] = s
            break

    # Auto-infer state if district is unique and state is missing
    if 'district' in found and 'state' not in found:
        possible_states = df_dist[df_dist['district_lower'] == found['district']]['state_lower'].unique()
        if len(possible_states) == 1:
            found['state'] = possible_states[0]

    return found

# --- 4. Chat Interface Layout ---

st.title("üå± AgriBot")
st.markdown("I can help you calculate fertilizer reductions based on your soil health.")

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = [
        {"role": "assistant", "content": "Hello! I am AgriBot. Tell me your **District** and **Crop** (e.g., *'Rice in Ludhiana'*), and I'll calculate potential fertilizer savings for you."}
    ]

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# --- 5. Chat Logic ---

if prompt := st.chat_input("Type here... (e.g., 'Check Wheat for Hisar')"):
    # Display user message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Bot processing
    with st.chat_message("assistant"):
        with st.spinner("Analyzing soil data..."):
            # 1. Parse entities
            entities = parse_input(prompt)

            district = entities.get('district')
            state = entities.get('state')
            crop = entities.get('crop')

            # 2. Decide response
            response_text = ""

            if district and state and crop:
                # We have everything needed
                response_text = calculate_reduction(district, state, crop)
            elif district and crop and not state:
                 # This happens if a district exists in multiple states (rare but possible)
                 response_text = f"I found **{district.title()}**, but I'm not sure which State it is in. Could you specify the State?"
            elif not district and not crop:
                response_text = "I didn't catch the location or crop. Please mention a **District** (like Ludhiana) and a **Crop** (like Rice)."
            elif not district:
                response_text = "Which **District** are you asking about?"
            elif not crop:
                response_text = f"I see you are asking about **{district.title()}**. Which **Crop** are you planning to grow?"
            else:
                 response_text = "I'm having trouble understanding. Please try a format like: *'Plan for Rice in Ludhiana, Punjab'*."

            st.markdown(response_text)
            st.session_state.messages.append({"role": "assistant", "content": response_text})

# --- Sidebar for Data Preview ---
with st.sidebar:
    st.header("üìä Database Status")
    if df_crop is not None:
        st.success(f"Loaded {len(df_crop)} Crops")
        st.dataframe(df_crop[['crop', 'N(kg/ha)']].head(5), hide_index=True)

    if df_dist is not None:
        st.success(f"Loaded {len(df_dist)} Districts")
        st.dataframe(df_dist[['state', 'district', 'Avg. soil N(kg/ha)']].head(5), hide_index=True)

2025-11-22 09:18:54.267 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-11-22 09:18:54.306 Session state does not function when running a script without `streamlit run`
