In [2]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

load_dotenv()

DB_URL = os.getenv("DATABASE_URL")

engine = create_engine(DB_URL)

df = pd.read_sql("SELECT * FROM weather_data", engine)
df.head()


Unnamed: 0,id,city,timestamp,temperature,humidity,wind_speed,pm10,pm2_5,nitrogen_dioxide,ozone
0,1,Mumbai,2025-11-12 06:51:58.152297+00:00,25.4,67.0,9.7,43.5,41.6,17.1,81.0
1,2,Delhi,2025-11-12 06:51:58.153351+00:00,11.7,90.0,4.3,137.8,105.9,15.5,78.0
2,3,Chennai,2025-11-12 06:51:58.157426+00:00,25.4,92.0,3.2,50.2,44.8,3.4,120.0
3,4,Hyderabad,2025-11-12 06:51:58.159499+00:00,18.4,79.0,2.3,55.9,53.3,12.5,94.0
4,5,Kolkata,2025-11-12 06:51:58.175223+00:00,17.4,95.0,3.4,155.5,143.0,27.6,44.0


In [4]:
df.describe()

Unnamed: 0,id,temperature,humidity,wind_speed,pm10,pm2_5,nitrogen_dioxide,ozone
count,248.0,248.0,248.0,248.0,247.0,247.0,247.0,247.0
mean,124.5,15.635484,80.774194,4.925806,51.046559,45.924291,14.989474,57.202429
std,71.735626,7.649172,14.769944,2.667661,36.874298,31.821173,10.173777,30.494427
min,1.0,-6.7,28.0,1.8,6.9,4.4,0.2,4.0
25%,62.75,11.1,74.0,3.4,21.4,19.3,8.3,34.0
50%,124.5,15.9,84.0,4.0,40.9,39.5,13.4,56.0
75%,186.25,20.5,91.0,5.4,74.0,69.8,21.0,78.0
max,248.0,27.4,98.0,14.3,155.5,143.0,47.2,124.0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 248 entries, 0 to 247
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   id                248 non-null    int64              
 1   city              248 non-null    object             
 2   timestamp         248 non-null    datetime64[ns, UTC]
 3   temperature       248 non-null    float64            
 4   humidity          248 non-null    float64            
 5   wind_speed        248 non-null    float64            
 6   pm10              247 non-null    float64            
 7   pm2_5             247 non-null    float64            
 8   nitrogen_dioxide  247 non-null    float64            
 9   ozone             247 non-null    float64            
dtypes: datetime64[ns, UTC](1), float64(7), int64(1), object(1)
memory usage: 19.5+ KB


In [6]:
df.isnull().sum()

id                  0
city                0
timestamp           0
temperature         0
humidity            0
wind_speed          0
pm10                1
pm2_5               1
nitrogen_dioxide    1
ozone               1
dtype: int64

In [7]:
df.nunique()

id                  248
city                 31
timestamp           248
temperature          28
humidity             25
wind_speed           27
pm10                 61
pm2_5                58
nitrogen_dioxide     55
ozone                47
dtype: int64

In [9]:
pd.read_sql("""
SELECT city , MIN(pm10) AS min_pm10
FROM weather_data
GROUP BY city
ORDER BY min_pm10
LIMIT 10
""", engine)


Unnamed: 0,city,min_pm10
0,Leh,6.9
1,Dehradun,11.6
2,Kavaratti,13.4
3,Aizawl,14.4
4,Shillong,15.4
5,Chandigarh,15.9
6,Port Blair,17.1
7,Itanagar,18.5
8,Kohima,22.7
9,Panaji,24.0


In [11]:
pd.read_sql("""
SELECT
    city,
    ROUND(AVG(temperature)::numeric) AS avg_temp
FROM weather_data
GROUP BY city
ORDER BY avg_temp DESC
LIMIT 10""", engine)


Unnamed: 0,city,avg_temp
0,Thiruvananthapuram,27.0
1,Port Blair,27.0
2,Kavaratti,27.0
3,Puducherry,25.0
4,Mumbai,25.0
5,Chennai,25.0
6,Panaji,23.0
7,Dispur,21.0
8,Bengaluru,19.0
9,Agartala,19.0


In [14]:
df_numeric = df.select_dtypes(include=['number'])
df_numeric.corr()

Unnamed: 0,id,temperature,humidity,wind_speed,pm10,pm2_5,nitrogen_dioxide,ozone
id,1.0,-0.029843,-0.03,0.028942,-0.06293,-0.067962,-0.059587,0.004954
temperature,-0.029843,1.0,0.360086,0.369231,0.08461,0.119878,-0.231246,0.284575
humidity,-0.03,0.360086,1.0,-0.175764,0.311973,0.330922,-0.175942,-0.002726
wind_speed,0.028942,0.369231,-0.175764,1.0,-0.316626,-0.347586,-0.393232,0.269253
pm10,-0.06293,0.08461,0.311973,-0.316626,1.0,0.988713,0.264094,0.152966
pm2_5,-0.067962,0.119878,0.330922,-0.347586,0.988713,1.0,0.316479,0.097239
nitrogen_dioxide,-0.059587,-0.231246,-0.175942,-0.393232,0.264094,0.316479,1.0,-0.66545
ozone,0.004954,0.284575,-0.002726,0.269253,0.152966,0.097239,-0.66545,1.0


In [16]:
city_avg = df.groupby('city')[['temperature', 'humidity', 'pm10', 'pm2_5', 'nitrogen_dioxide', 'ozone']].mean().reset_index()
city_avg

Unnamed: 0,city,temperature,humidity,pm10,pm2_5,nitrogen_dioxide,ozone
0,Agartala,19.1,91.0,76.25,71.8,18.1,48.5
1,Aizawl,11.1,96.0,15.1,14.15,14.6,11.0
2,Bengaluru,19.3,93.0,49.157143,47.9,15.485714,68.142857
3,Bhopal,12.4,83.0,40.75,39.5,14.15,58.0
4,Bhubaneswar,15.6,90.0,95.0,88.8,24.4,32.0
5,Chandigarh,11.0,76.0,17.2,16.85,22.05,47.0
6,Chennai,25.4,92.0,49.65,44.35,3.0,122.0
7,Dehradun,7.8,74.0,11.95,11.8,27.2,5.0
8,Delhi,11.7,90.0,134.55,103.35,14.8,81.5
9,Dispur,20.5,94.0,39.75,38.45,9.05,45.0
