In [17]:
import pandas as pd
import numpy as np
import streamlit as st
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Initialize session state
if 'model_trained' not in st.session_state:
    st.session_state.model_trained = False

# Load Data
@st.cache_data
def load_data(uploaded_file=None):
    try:
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
        else:
            df = pd.read_csv("climate_data_final_df.csv")  # Default file
        df.drop(columns=['Unnamed: 0'], inplace=True, errors='ignore')
        return df
    except Exception as e:
        st.error(f"Data loading failed: {str(e)}")
        return pd.DataFrame()

# Streamlit UI
st.title("Climate Change Impact Analyzer")

# File Upload
uploaded_file = st.file_uploader("Upload your climate dataset (CSV)", type=["csv"])
df = load_data(uploaded_file)
if df.empty:
    st.error("No data loaded. Please upload a valid CSV file.")
    st.stop()

# Sidebar Controls
with st.sidebar:
    st.header("Analysis Controls")
    selected_country = st.selectbox("Select Country", df['Entity'].unique())
    available_features = [col for col in df.columns if col not in ['Entity', 'Year']]
    selected_features = st.multiselect("Select Features", available_features, default=available_features[:2])
    target = st.selectbox("Select Target Variable", [col for col in df.columns if col not in ['Entity', 'Year']])
    model_choice = st.radio("Select Model Type", ["Random Forest", "LSTM"], horizontal=True)

# Data Filtering
filtered_df = df[df['Entity'] == selected_country][['Year'] + selected_features + [target]].dropna()
if filtered_df.empty:
    st.error("No data available for selected country and features.")
    st.stop()

# Prepare Data
X = filtered_df[selected_features]
y = filtered_df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def train_model(X_train_scaled, y_train, model_type):
    if model_type == "Random Forest":
        model = RandomForestRegressor(n_estimators=200, random_state=42)
        model.fit(X_train_scaled, y_train)
    elif model_type == "LSTM":
        X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(1, X_train_scaled.shape[1])),
            Dropout(0.3),
            LSTM(32),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train_reshaped, y_train, epochs=100, batch_size=16, validation_split=0.2, verbose=0)
    return model

# Train Model
model = train_model(X_train_scaled, y_train, model_choice)

# Predictions & Metrics
y_pred = model.predict(X_test_scaled) if model_choice == "Random Forest" else model.predict(X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1])))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

st.subheader("Model Performance")
st.write(f"**RMSE:** {rmse:.4f}")
st.write(f"**R² Score:** {r2:.4f}")
st.write(f"**MAE:** {mae:.4f}")


2025-03-27 16:38:40.213 No runtime found, using MemoryCacheStorageManager


