In [None]:
# crop_yield_app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from io import BytesIO
from fpdf import FPDF

# ========== PAGE CONFIG ==========
st.set_page_config(page_title="Crop Yield Prediction App", layout="wide")

# ========== CUSTOM STYLES ==========
st.markdown("""
    <style>
    .main { background-color: #f8f9fa; }
    .stApp { font-family: 'Segoe UI', sans-serif; }
    h1 { color: #2e7d32; }
    .css-1d391kg { background-color: #e8f5e9; padding: 10px; border-radius: 5px; }
    </style>
""", unsafe_allow_html=True)

# ========== HEADER ==========
st.title("🌾 AI-Powered Crop Yield Prediction App")
st.markdown("""
Welcome! This tool helps farmers, agricultural organizations, and government agencies estimate crop yield using machine learning.
You can upload your own dataset or use a sample to explore insights and generate predictions.
""")

# ========== USER ROLE & DATA SOURCE ==========
role = st.selectbox("Who are you?", ["Farmer", "Organization", "Government Agency"])
data_source = st.radio("Choose data source:", ["Upload my own data", "Use sample (pretrained) dataset"])

# ========== DATA UPLOAD ==========
uploaded_file = st.file_uploader("Upload your crop dataset (.csv)", type=["csv"])

def load_sample_data():
    return pd.read_csv("sample_crop_data.csv")  # Replace with your sample path

if data_source == "Upload my own data" and uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.success("✅ Data uploaded successfully!")
elif data_source == "Use sample (pretrained) dataset":
    df = load_sample_data()
    st.info("ℹ️ Using sample data.")
else:
    df = pd.DataFrame()

if not df.empty:
    st.subheader("📄 Data Preview")
    st.dataframe(df.head())

    # ========== EDA SECTION ==========
    with st.expander("📊 Exploratory Data Analysis"):
        col1, col2 = st.columns(2)

        with col1:
            if 'Crop_Type' in df.columns:
                fig = px.histogram(df, x='Crop_Type', title='Crop Distribution')
                st.plotly_chart(fig)

        with col2:
            numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
            if len(numeric_cols) >= 2:
                fig = px.scatter_matrix(df, dimensions=numeric_cols[:3])
                st.plotly_chart(fig)

    # ========== DATA CLEANING OPTIONS ==========
    with st.expander("🧹 Data Cleaning"):
        st.markdown("Choose how you'd like to handle missing data:")
        strategy = st.radio("Missing value strategy", ["Drop rows", "Fill with mean", "Fill with median"])

        if strategy == "Drop rows":
            df_cleaned = df.dropna()
        elif strategy == "Fill with mean":
            df_cleaned = df.fillna(df.mean(numeric_only=True))
        elif strategy == "Fill with median":
            df_cleaned = df.fillna(df.median(numeric_only=True))

        st.success("✅ Data cleaned.")
        st.dataframe(df_cleaned.head())

    # ========== MODEL SELECTION & TRAINING ==========
    with st.expander("🤖 Model Training"):
        from sklearn.model_selection import train_test_split
        from sklearn.ensemble import RandomForestRegressor
        from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

        features = st.multiselect("Select features for prediction", df_cleaned.select_dtypes(include=np.number).columns.tolist())
        target = st.selectbox("Select target (what to predict)", df_cleaned.columns)

        if st.button("Train Model"):
            X = df_cleaned[features]
            y = df_cleaned[target]

            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            model = RandomForestRegressor()
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            st.success("✅ Model trained!")
            col1, col2, col3 = st.columns(3)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            rmse = mean_squared_error(y_test, y_pred, squared=False)
            col1.metric("MAE", f"{mae:.2f}")
            col2.metric("R² Score", f"{r2:.2f}")
            col3.metric("RMSE", f"{rmse:.2f}")

    # ========== PREDICTION ==========
    with st.expander("🔮 Predict Yield"):
        st.markdown("Enter crop and environment data to predict expected yield:")

        inputs = []
        for feat in features:
            value = st.number_input(f"{feat}", value=float(df_cleaned[feat].mean()))
            inputs.append(value)

        if st.button("Predict Yield"):
            pred = model.predict([inputs])[0]
            st.success(f"🌾 Estimated Yield: {pred:.2f} units")

    # ========== EXPORT ==========
    with st.expander("📥 Download Cleaned Data"):
        csv = df_cleaned.to_csv(index=False).encode('utf-8')
        st.download_button("Download CSV", csv, "cleaned_crop_data.csv", "text/csv")

    # ========== PDF REPORT ========== 
    def generate_pdf_report():
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        pdf.cell(200, 10, txt="Crop Yield Prediction Report", ln=True, align='C')
        pdf.ln(10)
        pdf.cell(200, 10, txt=f"R² Score: {r2:.2f}  |  MAE: {mae:.2f}  |  RMSE: {rmse:.2f}", ln=True)
        pdf.ln(10)
        pdf.multi_cell(0, 10, txt=f"Features used: {', '.join(features)}\nTarget: {target}")
        pdf_output = BytesIO()
        pdf.output(pdf_output)
        pdf_output.seek(0)
        return pdf_output

    with st.expander("📄 Download PDF Report"):
        pdf_file = generate_pdf_report()
        st.download_button("Download PDF Report", pdf_file, file_name="yield_report.pdf", mime="application/pdf")

else:
    st.warning("⚠️ Please upload a dataset or select sample data to continue.")
