In [1]:
#IMPORT NECESSARY LIBRARIES
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score,root_mean_squared_error
import pickle

df = pd.read_excel('Ocean_cleaned_data.xlsx')

x = df[['data_Source_encoded','Country_encoded','Region_encoded','Year','Pollution_Level_encoded','Waste_Management_Efficiency','Nearby_Population']]
y = df['Plastic_Weight_kg']

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(x_train, y_train)

y_pred = model.predict(x_test)

from sklearn.metrics import r2_score
r2 = r2_score(y_test,y_pred)
r2
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test,y_pred)
mse
from sklearn.metrics import root_mean_squared_error
rmse = root_mean_squared_error(y_test,y_pred)
rmse

print("r¬≤ :", r2)
print("mse:", mse)
print("rmse:",rmse)

with open('LinearRegression.pkl', 'wb') as file:
    pickle.dump(model, file)

r¬≤ : 0.969479675249863
mse: 632.7341793483675
rmse: 25.154207984915118


In [2]:
!pip install streamlit



In [3]:
%%writefile ocean_plastic.py
import streamlit as st
import pickle
import numpy as np

# ==========================
# Ocean Plastic Pollution Prediction App üåä
# ==========================

import streamlit as st
import pandas as pd
import pickle

# --------------------------
# Load the trained model
# --------------------------
try:
    with open("LinearRegression.pkl", "rb") as file:
        model = pickle.load(file)
except FileNotFoundError:
    st.error("‚ùå LinearRegression.pkl not found. Make sure it is in the same folder as this app.")
    st.stop()

# --------------------------
# App Title
# --------------------------
st.title("üåç Ocean Plastic Pollution Prediction App")
st.markdown(
    "Use this app to predict **Plastic Weight (kg)** in ocean regions based on pollution data."
)

# --------------------------
# Sidebar - Single Prediction Inputs
# --------------------------
st.sidebar.header("üîß Input Features for Single Prediction")

data_Source_encoded = st.sidebar.number_input("Data Source Encoded", min_value=0, value=1)
country_encoded = st.sidebar.number_input("Country Encoded", min_value=0, value=1)
region_encoded = st.sidebar.number_input("Region Encoded", min_value=0, value=1)
year = st.sidebar.number_input("Year", min_value=1900, max_value=2100, value=2024)
pollution_level_encoded = st.sidebar.number_input(
    "Pollution Level Encoded (Low=0, Moderate=1, High=2)", min_value=0, max_value=2, value=1
)
waste_efficiency = st.sidebar.number_input(
    "Waste Management Efficiency (0-1)", min_value=0.0, max_value=1.0, step=0.01, value=0.5
)
nearby_population = st.sidebar.number_input("Nearby Population", min_value=0, value=10000)

# --------------------------
# Prepare input dataframe for single prediction
# --------------------------
input_data = pd.DataFrame({
    'data_Source_encoded': [data_Source_encoded],
    'Country_encoded': [country_encoded],
    'Region_encoded': [region_encoded],
    'Year': [year],
    'Pollution_Level_encoded': [pollution_level_encoded],
    'Waste_Management_Efficiency': [waste_efficiency],
    'Nearby_Population': [nearby_population]
})

st.subheader("üßæ Input Data Preview")
st.write(input_data)

# --------------------------
# Single Prediction
# --------------------------
if st.button("Predict Plastic Weight (kg)"):
    prediction = model.predict(input_data)
    st.success(f"üåä **Predicted Plastic Weight:** {prediction[0]:,.2f} kg")

# --------------------------
# Batch Prediction from Excel
# --------------------------
st.markdown("---")
st.subheader("üì§ Upload Excel File for Batch Prediction")
uploaded_file = st.file_uploader("Upload Excel file (.xlsx) with required columns", type=["xlsx"])

if uploaded_file is not None:
    try:
        df = pd.read_excel(uploaded_file)
        st.write("üìÑ Uploaded Data Preview:", df.head())

        # Required columns
        required_cols = ['data_Source_encoded','Country_encoded','Region_encoded','Year',
                         'Pollution_Level_encoded','Waste_Management_Efficiency','Nearby_Population']

        if all(col in df.columns for col in required_cols):
            preds = model.predict(df[required_cols])
            df['Predicted_Plastic_Weight_kg'] = preds
            st.write("‚úÖ Prediction Results:", df.head())

            # Download predictions as CSV
            st.download_button(
                label="‚¨áÔ∏è Download Predictions as CSV",
                data=df.to_csv(index=False).encode('utf-8'),
                file_name='Predicted_Ocean_Plastic.csv',
                mime='text/csv'
            )
        else:
            st.error(f"Uploaded file must contain all required columns: {required_cols}")
    except Exception as e:
        st.error(f"Error reading file: {e}")

st.markdown("---")
st.caption("Developed with ‚ù§Ô∏è using Streamlit & Scikit-learn")


Overwriting ocean_plastic.py
