 # Atmospheric Emmissions Prediction Using Support Vector Regression (SVR) Model

# Import the libraries

In [1]:
import os

In [2]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
import joblib

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
from sklearn.svm import SVR

In [7]:
from sklearn.svm import LinearSVR

In [8]:
from sklearn.multioutput import MultiOutputRegressor

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
from sklearn.metrics import mean_squared_error

In [11]:
from sklearn.metrics import r2_score

In [12]:
from sklearn.decomposition import PCA

In [13]:
from sklearn.pipeline import make_pipeline

In [14]:
from sklearn.pipeline import Pipeline

In [15]:
# Define paths for train data

In [16]:
TRAIN_DATA_PATH = '../../data/processed/train_data.csv'
TEST_DATA_PATH = '../../data/processed/test_data.csv'

In [17]:
# Load the data

In [18]:
train_data = pd.read_csv(TRAIN_DATA_PATH)
test_data = pd.read_csv(TEST_DATA_PATH)

In [19]:
# Display the first five rows of the train data

In [20]:
train_data.head()

Unnamed: 0,Easting,Northing,Borough_Barnet,Borough_Bexley,Borough_Brent,Borough_Bromley,Borough_Camden,Borough_City,Borough_City of Westminster,Borough_Croydon,...,Source_Small Private Vessels,Source_Small Scale Waste Burning,Source_Taxi,Source_TfL Bus,Source_WTS,Source_Wood Burning,nox,pm10,pm2.5,so2
0,-0.910504,-1.249967,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
1,1.035888,-0.322577,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
2,-1.850141,0.450249,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
3,1.707058,0.218401,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
4,1.572824,-0.090729,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.234448,-0.295601,-0.404647,0.0


In [21]:
# Display the first five rows of the test data

In [22]:
test_data.head()

Unnamed: 0,Easting,Northing,Borough_Barnet,Borough_Bexley,Borough_Brent,Borough_Bromley,Borough_Camden,Borough_City,Borough_City of Westminster,Borough_Croydon,...,Source_Small Private Vessels,Source_Small Scale Waste Burning,Source_Taxi,Source_TfL Bus,Source_WTS,Source_Wood Burning,nox,pm10,pm2.5,so2
0,-0.1051,1.686769,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
1,-1.783024,-0.786272,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,1.234381,1.766942,3.368122,0.0
2,0.76742,0.604814,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
3,-0.1051,-0.477142,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,0.97389,3.215296,4.742593,0.0
4,-1.313205,1.764052,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0


# Develop the Model

In [23]:
# Method: Using Support Vector Regression Algorith (SVR)

In [24]:
# Define the target columns(Pollutants)

In [25]:
target_columns = ['nox','pm10','pm2.5','so2']

In [26]:
# Define x (features) and y (target) for both train and test data.

In [27]:
x_train = train_data.drop(['nox', 'pm10', 'pm2.5', 'so2'], axis=1)
y_train = train_data[['nox', 'pm10', 'pm2.5', 'so2']]

In [28]:
x_test = test_data.drop(['nox','pm10','pm2.5', 'so2'], axis=1)
y_test = test_data[['nox','pm10','pm2.5','so2']]

In [29]:
# Print the shape of the training and test dataset

In [30]:
print (f"x_train shape: {x_train.shape}, y_train shape : {y_train.shape}")
print (f"x_test shape: {x_test.shape}, y_test shape : {y_test.shape}")

x_train shape: (59934, 95), y_train shape : (59934, 4)
x_test shape: (14984, 95), y_test shape : (14984, 4)


# Initialise the SVR Model and consolidate it with MultiOutput Regressor

In [31]:
# Scale the data using Standard Scaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train) # fit and transform the training data
x_test_scaled = scaler.transform(x_test) # transform the test data

# Create a Pipeline with Scaling, PCA and SVR Model 

pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()), # Scale the features
    ('pca', PCA(n_components=0.95)),  # Apply PCA for Dimensional Reduction
    ('svr', MultiOutputRegressor(LinearSVR(random_state=42, max_iter=50000)))  # Fit the SVR model
])

In [32]:
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),  # Scale the features
    ('pca', PCA(n_components=0.95)),  # Apply PCA for Dimensional Reduction
    ('svr', MultiOutputRegressor(LinearSVR(random_state=42, max_iter=50000), n_jobs=-1))  # Fit the SVR model and Parallelize across cores
])

# Train and Fit the Model

In [33]:
pipeline.fit(x_train, y_train)

# Make the Prediction on the test data

In [34]:

# Make predictions using the loaded pipeline
svr_predictions = pipeline.predict(x_test)

# Print a short summary of the predictions (first 5 predictions)
print(f"Predictions (first 5): {svr_predictions[:5]}")


Predictions (first 5): [[-0.23652937 -0.29492034 -0.40520607  0.        ]
 [ 0.04819226  0.28327174  0.62943275  0.        ]
 [-0.2374908  -0.29529185 -0.40988757  0.        ]
 [ 0.18140251  0.58635626  0.94454483  0.        ]
 [-0.23640523 -0.29504387 -0.40493734  0.        ]]


# Evaluate each target's prediction by using mean square error and r square.

In [35]:
mse = mean_squared_error(y_test, svr_predictions)
r2 = r2_score(y_test, svr_predictions) 
print(f"Mean Squared Error: {mse}")
print(f"R-Squared: {r2}")


Mean Squared Error: 0.8829494164203728
R-Squared: 0.133470901236239


# Save the Train Model

### We will now save our trained model for future purposes, evaluations and predictions

In [36]:
# Save the entire pipeline (scaler, PCA, and SVR)
MODEL_SAVE_PATH = '../../models/svr_pipeline_model.pkl'
joblib.dump(pipeline, MODEL_SAVE_PATH)

print(f"Pipeline model saved to: {MODEL_SAVE_PATH}")


Pipeline model saved to: ../../models/svr_pipeline_model.pkl
