Before you turn this problem in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel $\rightarrow$ Restart) and then **run all cells** (in the menubar, select Cell $\rightarrow$ Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE", as well as your name and collaborators below:

In [None]:
NAME = "Tejaswi Nalajala"
COLLABORATORS = ""

---

Before you turn this problem in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel $\rightarrow$ Restart) and then **run all cells** (in the menubar, select Cell $\rightarrow$ Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE", as well as your name and collaborators below:

In [None]:
NAME = "Tejaswi Nalajala"
COLLABORATORS = ""

---

## <font color="brown">Project: Air Quality Monitoring System</font>
## <font color="brown">- Tejaswi Nalajala </font>

## <font color="black">Dashboad </font>

collectdata.py

This script collects air quality data from various sensors. 
 

In [None]:
import requests
import psycopg2
from db_config import DB_SETTINGS, API_KEY

def fetch_data(location):
    url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={location['lat']}&lon={location['lon']}&appid={API_KEY}"
    response = requests.get(url)
    return response.json()

def save_data(db_conn, data, location):
    with db_conn.cursor() as cur:
        query = """
            INSERT INTO air_quality (location, time, aqi, co, no2, o3, so2, pm2_5, pm10)
            VALUES (%s, NOW(), %s, %s, %s, %s, %s, %s, %s)
        """
        values = (location['name'], data['list'][0]['main']['aqi'], data['list'][0]['components']['co'],
                  data['list'][0]['components']['no2'], data['list'][0]['components']['o3'],
                  data['list'][0]['components']['so2'], data['list'][0]['components']['pm2_5'],
                  data['list'][0]['components']['pm10'])
        cur.execute(query, values)
        db_conn.commit()

def main():
    db_conn = psycopg2.connect(**DB_SETTINGS)
    locations = [{"name": "City Center", "lat": 40.7128, "lon": -74.0060}]
    
    for location in locations:
        data = fetch_data(location)
        save_data(db_conn, data, location)
    
    db_conn.close()

if __name__ == "__main__":
    main()


db_config.py 

Contains database configuration settings.

In [9]:
DB_SETTINGS = {
    'dbname': 'air_quality_db',
    'user': 'your_username',
    'password': 'your_password',
    'host': 'localhost'
}

API_KEY = 'your_openweather_api_key'


## <font color="black">Data Collection </font>

cleandata.py

Script to clean and validate raw air quality data.

In [None]:
import psycopg2
from db_config import DB_SETTINGS

def clean_data(db_conn):
    with db_conn.cursor() as cur:
        cur.execute("DELETE FROM air_quality WHERE aqi IS NULL")
        cur.execute("UPDATE air_quality SET aqi = GREATEST(1, LEAST(aqi, 5))")
        db_conn.commit()

def main():
    db_conn = psycopg2.connect(**DB_SETTINGS)
    clean_data(db_conn)
    db_conn.close()

if __name__ == "__main__":
    main()


data_processing.py

Script for advanced data processing and analytics.

In [None]:
import pandas as pd
import psycopg2
from db_config import DB_SETTINGS

def create_features(db_conn):
    query = "SELECT * FROM air_quality"
    data = pd.read_sql(query, db_conn)

    data['rolling_aqi'] = data['aqi'].rolling(window=3).mean()
    data.to_sql('air_quality_features', db_conn, if_exists='replace')

def main():
    db_conn = psycopg2.connect(**DB_SETTINGS)
    create_features(db_conn)
    db_conn.close()

if __name__ == "__main__":
    main()


## <font color="black">Data Processing </font>

App.py

Main application that integrates data collection, cleaning, and processing.

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import psycopg2
import pandas as pd
from db_config import DB_SETTINGS

app = dash.Dash(__name__)

def fetch_data():
    db_conn = psycopg2.connect(**DB_SETTINGS)
    data = pd.read_sql("SELECT * FROM air_quality", db_conn)
    db_conn.close()
    return data

app.layout = html.Div([
    html.H1("Air Quality Dashboard"),
    dcc.Graph(id="map"),
    dcc.Interval(id="interval-component", interval=5*60*1000, n_intervals=0)
])

@app.callback(
    Output("map", "figure"),
    [Input("interval-component", "n_intervals")]
)
def update_map(n):
    data = fetch_data()
    fig = px.scatter_mapbox(
        data, lat="lat", lon="lon", color="aqi",
        mapbox_style="carto-positron", title="Air Quality Index"
    )
    return fig

if __name__ == "__main__":
    app.run_server(debug=True)


## <font color="black">Models </font>

arima.py

Implements ARIMA model for air quality forecasting.

In [None]:
import pandas as pd
import psycopg2
from statsmodels.tsa.arima.model import ARIMA
from db_config import DB_SETTINGS

def train_arima_model(data):
    model = ARIMA(data['aqi'], order=(1, 1, 1))
    fitted_model = model.fit()
    return fitted_model

def predict_arima(fitted_model, steps=5):
    return fitted_model.forecast(steps=steps)

def main():
    db_conn = psycopg2.connect(**DB_SETTINGS)
    query = "SELECT * FROM air_quality"
    data = pd.read_sql(query, db_conn)
    
    model = train_arima_model(data)
    predictions = predict_arima(model)
    print(predictions)

    db_conn.close()

if __name__ == "__main__":
    main()


lstm.py

Implements LSTM model for predicting air quality trends.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import psycopg2
from db_config import DB_SETTINGS

def train_lstm_model(data):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(data.shape[1], 1)),
        LSTM(50, return_sequences=False),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(data, data, batch_size=1, epochs=1)
    return model

def main():
    db_conn = psycopg2.connect(**DB_SETTINGS)
    query = "SELECT * FROM air_quality"
    data = pd.read_sql(query, db_conn)

    data = np.array(data['aqi']).reshape(-1, 1)
    model = train_lstm_model(data)
    predictions = model.predict(data)
    print(predictions)

    db_conn.close()

if __name__ == "__main__":
    main()


## <font color="black">Summary </font>

In [2]:
import requests

# API key for OpenWeatherMap (replace with your actual key if testing the API)
API_KEY = "your_api_key"

def fetch_data(location):
    """
    Fetch air quality data from the OpenWeatherMap API for a given location.

    returns simulated data if the API request fails.
    """
    try:
        # Simulated API URL (won't actually fetch anything in this temporary setup)
        url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={location['lat']}&lon={location['lon']}&appid={API_KEY}"
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"Error fetching data for location {location['name']}: {e}")
        print("Using simulated data instead.")
        # Simulated air quality data
        return {
            "list": [
                {
                    "main": {"aqi": 2},
                    "components": {
                        "co": 201.94,
                        "no2": 12.56,
                        "o3": 68.02,
                        "so2": 1.45,
                        "pm2_5": 5.34,
                        "pm10": 8.67,
                    }
                }
            ]
        }

def save_data(data, location):
    """
    Temporarily print air quality data instead of saving it to a database.
    """
    try:
        print(f"Saving data for {location['name']}...")
        print(f"  AQI: {data['list'][0]['main']['aqi']}")
        for component, value in data['list'][0]['components'].items():
            print(f"  {component}: {value}")
    except KeyError as e:
        print(f"Error processing data for location {location['name']}: {e}")

def main():
    """
    Main function to fetch and simulate saving air quality data.
    """
    print("Simulating data fetching and saving...")

    # List of locations
    locations = [
        {"name": "City Center", "lat": 40.7128, "lon": -74.0060},
        {"name": "Uptown", "lat": 40.7306, "lon": -73.9352},
    ]

    # Process each location
    for location in locations:
        data = fetch_data(location)
        if data:
            save_data(data, location)
        else:
            print(f"Failed to fetch or simulate data for {location['name']}.")

if __name__ == "__main__":
    main()


Simulating data fetching and saving...
Error fetching data for location City Center: 401 Client Error: Unauthorized for url: http://api.openweathermap.org/data/2.5/air_pollution?lat=40.7128&lon=-74.006&appid=your_api_key
Using simulated data instead.
Saving data for City Center...
  AQI: 2
  co: 201.94
  no2: 12.56
  o3: 68.02
  so2: 1.45
  pm2_5: 5.34
  pm10: 8.67
Error fetching data for location Uptown: 401 Client Error: Unauthorized for url: http://api.openweathermap.org/data/2.5/air_pollution?lat=40.7306&lon=-73.9352&appid=your_api_key
Using simulated data instead.
Saving data for Uptown...
  AQI: 2
  co: 201.94
  no2: 12.56
  o3: 68.02
  so2: 1.45
  pm2_5: 5.34
  pm10: 8.67
