In [1]:
!pip install streamlit==1.42.0

Collecting streamlit==1.42.0
  Downloading streamlit-1.42.0-py2.py3-none-any.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m123.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rich<14,>=10.14.0
  Downloading rich-13.9.4-py3-none-any.whl (242 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.4/242.4 kB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m
Collecting blinker<2,>=1.0.0
  Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7
  Downloading GitPython-3.1.44-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.6/207.6 kB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting toml<2,>=0.10.1
  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting watchdog<7,>=2.1.5
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m29.4 M

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib
import streamlit as st

# Generate synthetic air pollution data
num_samples = 150  # More than 100 samples
start_date = datetime(2024, 1, 1, 0, 0)

data = {
    "timestamp": [start_date + timedelta(hours=i) for i in range(num_samples)],
    "temperature": np.random.uniform(0, 40, num_samples),  # Celsius
    "humidity": np.random.uniform(20, 100, num_samples),  # Percentage
    "wind_speed": np.random.uniform(0, 15, num_samples),  # m/s
    "pm2_5": np.random.uniform(10, 150, num_samples),  # µg/m³
    "pm10": np.random.uniform(20, 200, num_samples),  # µg/m³
    "no2": np.random.uniform(5, 80, num_samples),  # µg/m³
    "so2": np.random.uniform(1, 50, num_samples),  # µg/m³
    "co": np.random.uniform(0.1, 10, num_samples),  # mg/m³
    "o3": np.random.uniform(10, 100, num_samples),  # µg/m³
}

df = pd.DataFrame(data)

# Save to CSV
csv_filename = "synthetic_air_pollution_data.csv"
df.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' generated successfully!")

# Load dataset
df = pd.read_csv(csv_filename)

# Convert timestamp to numerical feature
df["timestamp"] = pd.to_datetime(df["timestamp"]).astype('int64') // 10**9

# Define features and target variable
X = df.drop(columns=["pm2_5"])
y = df["pm2_5"]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

# Save trained model
model_filename = "air_pollution_model.pkl"
joblib.dump(model, model_filename)
print(f"Model saved as '{model_filename}'")

# Streamlit App
st.title("Air Pollution Prediction App")
st.write("This app predicts air pollution levels based on environmental factors.")

# User Input
temp = st.slider("Temperature (°C)", 0.0, 40.0, 20.0)
humidity = st.slider("Humidity (%)", 20.0, 100.0, 50.0)
wind_speed = st.slider("Wind Speed (m/s)", 0.0, 15.0, 5.0)
pm10 = st.slider("PM10 (µg/m³)", 20.0, 200.0, 100.0)
no2 = st.slider("NO2 (µg/m³)", 5.0, 80.0, 40.0)
so2 = st.slider("SO2 (µg/m³)", 1.0, 50.0, 10.0)
co = st.slider("CO (mg/m³)", 0.1, 10.0, 1.0)
o3 = st.slider("O3 (µg/m³)", 10.0, 100.0, 50.0)

# Load model
model = joblib.load(model_filename)

# Prediction
if st.button("Predict PM2.5 Level"):
    input_data = np.array([[datetime.now().timestamp(), temp, humidity, wind_speed, pm10, no2, so2, co, o3]])
    prediction = model.predict(input_data)[0]
    st.success(f"Predicted PM2.5 Level: {prediction:.2f} µg/m³")


CSV file 'synthetic_air_pollution_data.csv' generated successfully!
MAE: 34.69
RMSE: 41.65
Model saved as 'air_pollution_model.pkl'
2025-02-14 17:21:46.231 
  command:

    streamlit run /toolkit-cache/0.2.7/python3.9/kernel-libs/lib/python3.9/site-packages/ipykernel_launcher.py [ARGUMENTS]


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=dce7dd19-ab8b-4223-9dff-a9a8cad6bd87' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>