In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
import seaborn as sns

# Function to load and preprocess data
def load_data(file_path):
    try:
        data = pd.read_csv(file_path)
        data['Log_MPA'] = np.log(data['MPA'] + 1e-10)
        data['Log_Fishery_Consumption'] = np.log(data['FisheryConsumption'] + 1e-10)
        data['Log_Share_Plastic_Pollution'] = np.log(data['Share of global plastics emitted to ocean'] + 1e-10)
        return data
    except KeyError as e:
        st.error(f"Missing column in data: {e}")
        return None
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return None


# Function to fit polynomial regression model
def fit_polynomial_regression(data):
    poly = PolynomialFeatures(degree=2, include_bias=False)
    X_poly = poly.fit_transform(data[['Log_MPA', 'Log_Share_Plastic_Pollution']])
    X_poly_reduced = np.delete(X_poly, 3, axis=1)  # Remove interaction term
    X_poly_reduced = sm.add_constant(X_poly_reduced)
    model = sm.OLS(data['Log_Fishery_Consumption'], X_poly_reduced).fit()
    data['Predicted_Log_Fishery_Consumption'] = model.predict(X_poly_reduced)
    return model, data

# Function to plot results
def plot_results(data):
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=data['Log_MPA'], y=data['Log_Fishery_Consumption'], label='Actual Data')
    sns.lineplot(x=data['Log_MPA'], y=data['Predicted_Log_Fishery_Consumption'], color='red', label='Polynomial Regression')
    plt.xlabel('Log_MPA')
    plt.ylabel('Log_Fishery_Consumption')
    plt.title('Polynomial Regression: Log(MPA) vs Log(Fishery Consumption)')
    plt.legend()
    st.pyplot()
    plt.close()  # Close the plot to avoid duplication


# Main function for the Streamlit app
def main():
    st.title("Polynomial Regression Analysis")
    
    # Upload CSV file
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
    
    if uploaded_file is not None:
        # Load and preprocess data
        data = load_data(uploaded_file)
        
        # Fit polynomial regression model
        model, data = fit_polynomial_regression(data)
        
        # Display regression results
        st.subheader("OLS Regression Results")
        st.text(model.summary())
        
        # Plot the results
        st.subheader("Polynomial Regression Plot")
        plot_results(data)
        
        # Display conclusions
        st.subheader("Conclusions")
        st.write("""
        - The R-squared value is 0.265, indicating that approximately 26.5% of the variability in Log_Fishery_Consumption is explained by Log_MPA and Log_Share_Plastic_Pollution and their quadratic terms.
        - The overall model is statistically significant with a p-value of 6.60e-08.
        - Both Log_MPA and Log_Share_Plastic_Pollution are significantly associated with Log_Fishery_Consumption.
        - The polynomial regression model captures the relationship between the variables well, as shown in the plot.
        """)
    
if __name__ == "__main__":
    main()
