<a href="https://colab.research.google.com/github/Sana-Harshitha/EnviroScan-AI-Powered-Pollution-Source-Identifier-/blob/main/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Data Collection

In [None]:
!pip install osmnx

Collecting osmnx
  Downloading osmnx-2.0.6-py3-none-any.whl.metadata (4.9 kB)
Downloading osmnx-2.0.6-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.5/101.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: osmnx
Successfully installed osmnx-2.0.6


In [None]:
import pandas as pd
import osmnx as ox
import requests

In [None]:
def fetch_openaq_data(city , params ):
  url = f"https://api.openaq.org/v2/measurements?city={city}&parameter={params}"
  response = requests.get(url)
  data = response.json()
  return data

def fetch_weather_api(lat , lon , api_key):
  url =f"https://api.openweathermap.org/data/2.5/weather"
  params = {'lat':lat , 'lon':lon , 'appid': api_key }
  response = requests.get(url ,params = params)
  return response.json()

  def get_location_feature(lat,lon,dist=1000):
    G = ox.graph_from_point((lat,lon),dist,network_type='drive')
    roads = ox.geometries.geometries_from_point((lat,lon), tags={'highway': True},dist=dist)
    factories = ox.geometries.geometries_from_point((lat,lon), tags={'landue': 'industrail'},dist=dist)
    return {'roads':roads , 'factories':factories}

In [None]:
# city = "Delhi"
# parameter = "pm25"
# data = fetch_openaq_data(city, parameter)
# print(data)

In [None]:
def clean_pollution_data(df):
  df = df.drop_duplicates()
  df = df.dropna(subset=['value' , 'coordinates.latitude' , 'coordinates.longitude'])
  df['value'] = pd.to_numeric(df['value'])
  df['timestamp'] = pd.to_datatime(df['date']['utc'])
  df = df.fillna(df.mean())
  return df

def feature_engineering(df):
  for col in ['value']:
    df[col] = (df[col] - df[col].mean() / df[col].std())

  df['hour'] = df['timestamp'].dt.hour
  df['day_of_week'] = df['timestamp'].dt.dayofweek
  return df

In [None]:
def label_source(df):
  df['source'] = 'Unknown'
  df.loc[(df['near_main_road']==1) & (df['NO2'] > 40 ),'source'] = 'vehicular'
  df.loc[(df['near_factory']==1) & (df['SO2'] > 40 ),'source'] = 'Industrail'
  df.loc[(df['near_farmland']==1) & (df['season'] == 'Dry' )& (df['!PH2.5'] >70 ),'source'] = 'Agriculture'

  return df

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

def train_predict_model(df):
  features = ['PM2.5','NO2','SO2','CO','road_proxitmity','factories_proximity','temperature','humidty','hour','dayofweek']
  X = df[features]
  y = df['source']
  X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2 , stratify=y)
  clf = RandomForestClassifier()
  param_grid = {
      'n_estimators':[50,100,200],
      'max_depth':[None,10,20,30],
      'min_samples_split':[2,5,10]
  }
  grid = GridSearchCV(clf , param_grid , cv=5)
  grid.fit(X_train , y_train)
  y_pred = grid.predict(X_test)

  print(classification_report(y_test , y_pred))

  return grid.best_estimator_

In [None]:
import folium

def plot_heatmap(df):
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
    for _, row in df.iterrows():
        folium.Circle(
            location=[row['latitude'], row['longitude']],
            radius=50,
            color="red" if row['source'] == "Industrial" else "blue",
            fill=True
        ).add_to(m)
    return m


In [None]:
!pip install streamlit
!pip install pyngrok

!pip install -q streamlit pyngrok


Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m123.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.49.1
Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [None]:
%%writefile app.py
import streamlit as st

st.set_page_config(page_title="Pollution Source Identifier")

st.title(" AI-Powered Pollution Source Identifier")

city = st.text_input("Enter a city name", placeholder="e.g., Delhi")

 if st.button("Analyze"):
    if city.strip() == "":
        st.warning(" Please enter a valid city name.")
    else:
        st.success(f"Analyzing pollution sources for: {city}")
        st.markdown(f"""
        ###  AI Analysis Results for **{city}** (Simulated)
        - **Main Pollutants:** PM2.5, NOx, SO2
        - **Likely Sources:**
            -  Vehicle emissions
            - Industrial activity
            -  Biomass/garbage burning
        - **Air Quality Index (AQI):** 185 (Unhealthy)
        - **Recommendation:** Limit outdoor activity. Use masks. Air purifiers recommended indoors.
        """)

Writing app.py


In [None]:
# ✅ Step 1: Install required packages
!pip install pyngrok streamlit --quiet

# ✅ Step 2: Import necessary libraries
from pyngrok import ngrok
import time
import os

# ✅ Step 3: Export your ngrok authtoken (this sets an environment variable)
os.environ["NGROK_AUTHTOKEN"] =  "32Sq7u2EUMJ9812imkfYcDb8Ak1_w8EoNXvS2mnwzExxkzyh"

# ✅ Step 4: Authenticate pyngrok using the exported token
ngrok.set_auth_token(os.environ["NGROK_AUTHTOKEN"])

# ✅ Step 5: Kill any existing Streamlit process
!pkill streamlit

# ✅ Step 6: Define your Streamlit app code
app_code = '''
import streamlit as st

st.set_page_config(page_title="Streamlit via ngrok", page_icon="🔗")
st.title("🚀 Hello from Streamlit!")
st.write("This Streamlit app is running through an ngrok tunnel.")
'''

# ✅ Step 7: Write app code to a file
with open("app.py", "w") as f:
    f.write(app_code)

# ✅ Step 8: Start the Streamlit app in the background
!streamlit run app.py &> /content/logs.txt &

# ✅ Step 9: Wait a few seconds for the app to boot
time.sleep(5)

# ✅ Step 10: Open an ngrok tunnel to port 8501
public_url = ngrok.connect(8501)

# ✅ Step 11: Print the public URL
print("🌐 Your Streamlit app is live at:", public_url)


🌐 Your Streamlit app is live at: NgrokTunnel: "https://58502a92a341.ngrok-free.app" -> "http://localhost:8501"


In [None]:
# Start Streamlit in background
!streamlit run app.py &> /content/logs.txt &

# Give more time for Streamlit to start (e.g., 15 seconds)
import time
time.sleep(15)

# Then create ngrok tunnel
from pyngrok import ngrok
public_url = ngrok.connect(8501)
print("🌐 Your Streamlit app is live here:", public_url)


🌐 Your Streamlit app is live here: NgrokTunnel: "https://0a6601dff2f9.ngrok-free.app" -> "http://localhost:8501"


In [None]:
# Install required packages (run once)
!pip install --quiet streamlit pyngrok

import os
import time
import subprocess
from pyngrok import ngrok

# Set your ngrok auth token (replace with your actual token)
NGROK_AUTH_TOKEN = "32Sq7u2EUMJ9812imkfYcDb8Ak1_w8EoNXvS2mnwzExxkzyh"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Write a simple Streamlit app
app_code = """
import streamlit as st

st.title("🚀 EnviroScan Pollution Source Identifier")

city = st.text_input("Enter a city name", "Delhi")
if st.button("Analyze"):
    if not city.strip():
        st.warning("Please enter a valid city name.")
    else:
        st.success(f"Analyzing pollution sources for: {city}")
        st.markdown('''
        ### AI Analysis Results for **{city}** (Simulated)
        - **Main Pollutants:** PM2.5, NOx, SO2
        - **Likely Sources:**
          - Vehicle emissions
          - Industrial activity
          - Biomass/garbage burning
        - **Air Quality Index (AQI):** 185 (Unhealthy)
        - **Recommendation:** Limit outdoor activity. Use masks. Air purifiers recommended indoors.
        '''.format(city=city))
"""

with open("app.py", "w") as f:
    f.write(app_code)

# Kill previous Streamlit instances
subprocess.run(["pkill", "-f", "streamlit"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Disconnect any existing ngrok tunnels to avoid free-tier limits
for tunnel in ngrok.get_tunnels():
    ngrok.disconnect(tunnel.public_url)

# Start Streamlit app in the background
streamlit_process = subprocess.Popen(["streamlit", "run", "app.py"])

# Wait for Streamlit server to start
time.sleep(15)  # Increase if needed

# Open ngrok tunnel to port 8501
public_url = ngrok.connect(8501)
print(f"🌐 Your Streamlit app is live at: {public_url}")

# Keep process alive to maintain server & tunnel
try:
    streamlit_process.wait()
except KeyboardInterrupt:
    streamlit_process.terminate()
    ngrok.disconnect(public_url)
    print("Terminated Streamlit and ngrok tunnel.")




🌐 Your Streamlit app is live at: NgrokTunnel: "https://6bfca0e96310.ngrok-free.app" -> "http://localhost:8501"
