# LST PREDICTIONS WITH HELP OF AIR POLLUTANTS

This project uses satellite-derived air pollutants (AOD, NO₂, CO, O₃) to predict Land Surface Temperature (LST) in Noida using machine learning model- Linear Regression. Data was collected from Google Earth Engine for multiple years (2018, 2021, 2024) and seasons (summer, winter), and preprocessed using Python (Pandas, Sklearn).

In [32]:
import numpy as np
import pandas as pd

## Load Dataset

In [33]:
df=pd.read_csv('LST.csv')

In [34]:
df.head()

Unnamed: 0,Year,Season,AOD,CO,NO2,O3,LST
0,2018,Winter,-0.424806,0.043344,0.000228,0.115029,23.535808
1,2018,Winter,-0.462145,0.043404,0.000234,0.115158,25.415719
2,2018,Winter,-0.321403,0.045138,0.000324,0.115436,25.832718
3,2018,Winter,-0.545578,0.041898,0.000234,0.1153,22.992343
4,2018,Winter,-0.523436,0.042641,0.000241,0.115381,24.81073


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    1000 non-null   int64  
 1   Season  1000 non-null   object 
 2   AOD     1000 non-null   float64
 3   CO      1000 non-null   float64
 4   NO2     1000 non-null   float64
 5   O3      1000 non-null   float64
 6   LST     1000 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 54.8+ KB


In [36]:
df.describe()

Unnamed: 0,Year,AOD,CO,NO2,O3,LST
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,2021.6,-0.068956,0.043478,0.000165,0.125757,32.079989
std,2.246118,0.388557,0.001919,5.3e-05,0.006361,7.824722
min,2018.0,-0.661718,0.039615,9.7e-05,0.114324,21.888323
25%,2021.0,-0.523348,0.041841,0.000121,0.12366,25.585424
50%,2021.0,0.137581,0.043484,0.00015,0.126394,28.042468
75%,2024.0,0.274019,0.044951,0.000198,0.129703,39.237053
max,2024.0,0.462432,0.047487,0.000337,0.13436,51.402925


In [37]:
df.isnull().sum()

Year      0
Season    0
AOD       0
CO        0
NO2       0
O3        0
LST       0
dtype: int64

In [38]:
df.duplicated().sum()

0

## Train-test-split

In [39]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(df.iloc[:,:-1],df.iloc[:,-1],test_size=0.2,random_state=42)

In [40]:
X_train.sample(5)

Unnamed: 0,Year,Season,AOD,CO,NO2,O3
681,2024,Summer,0.316905,0.043645,0.000154,0.1342
843,2024,Winter,0.177669,0.046635,0.000201,0.123806
492,2021,Winter,0.235839,0.04412,0.000146,0.126299
944,2024,Winter,0.071705,0.045411,0.000133,0.123687
255,2021,Summer,-0.639929,0.041066,0.000118,0.129497


## Encoding

In [41]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(drop='first', sparse_output=False)

In [42]:
season_encoded_train = ohe.fit_transform(X_train[['Season']])

In [43]:
season_df_train = pd.DataFrame(season_encoded_train, columns=ohe.get_feature_names_out(['Season']), index=X_train.index)
X_train = X_train.drop('Season', axis=1).join(season_df_train)

In [44]:
season_encoded_test = ohe.transform(X_test[['Season']])
season_df_test = pd.DataFrame(season_encoded_test, columns=ohe.get_feature_names_out(['Season']), index=X_test.index)
X_test = X_test.drop('Season', axis=1).join(season_df_test)

## Linear Regression

In [45]:
from sklearn.linear_model import LinearRegression
lr= LinearRegression()

In [46]:
lr.fit(X_train,Y_train)

In [47]:
y_pred=lr.predict(X_test)

## Evaluation Metrics

In [48]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [49]:
r2_score(Y_test, y_pred)

0.9114895394784478

In [50]:
mean_absolute_error(Y_test, y_pred)

1.8353547761639084

In [51]:
mse_lr=mean_squared_error(Y_test, y_pred)
mse_lr

5.1629054849733675

In [52]:
RMSE_lr=np.sqrt(mse_lr)
RMSE_lr

2.2722027825379865

In [53]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(n_estimators=100, random_state=42)

In [54]:
rfr.fit(X_train, Y_train)

In [55]:
y_pred_rfr = rfr.predict(X_test)


In [56]:
r2_score(Y_test, y_pred_rfr)

0.9276361912359465

In [57]:
mean_absolute_error(Y_test, y_pred_rfr)

1.5414030004213726

In [58]:
mse_rfr=mean_squared_error(Y_test, y_pred_rfr)
mse_rfr

4.221054810696899

In [59]:
RMSE_rfr=np.sqrt(mse_rfr)
RMSE_rfr

2.0545205792828893