 # Atmospheric Emmissions Prediction Using Support Vector Regression (SVR) Model

# Import the libraries

In [9]:
import os

In [11]:
import pandas as pd

In [13]:
import numpy as np

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
from sklearn.svm import SVR

In [17]:
from sklearn.svm import LinearSVR

In [18]:
from sklearn.multioutput import MultiOutputRegressor

In [19]:
from sklearn.preprocessing import StandardScaler

In [20]:
from sklearn.metrics import mean_squared_error

In [27]:
from sklearn.metrics import r2_score

In [29]:
from sklearn.decomposition import PCA

In [31]:
from sklearn.pipeline import make_pipeline

In [36]:
from sklearn.pipeline import Pipeline

In [38]:
# Define paths for train data

In [40]:
TRAIN_DATA_PATH = 'train_data.csv'
TEST_DATA_PATH = 'test_data.csv'

In [42]:
# Load the data

In [44]:
train_data = pd.read_csv(TRAIN_DATA_PATH)
test_data = pd.read_csv(TEST_DATA_PATH)

In [45]:
# Display the first five rows of the train data

In [46]:
train_data.head()

Unnamed: 0,Easting,Northing,Borough_Barnet,Borough_Bexley,Borough_Brent,Borough_Bromley,Borough_Camden,Borough_City,Borough_City of Westminster,Borough_Croydon,...,Source_Small Private Vessels,Source_Small Scale Waste Burning,Source_Taxi,Source_TfL Bus,Source_WTS,Source_Wood Burning,nox,pm10,pm2.5,so2
0,-0.910504,-1.249967,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
1,1.035888,-0.322577,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
2,-1.850141,0.450249,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
3,1.707058,0.218401,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
4,1.572824,-0.090729,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.234448,-0.295601,-0.404647,0.0


In [50]:
# Display the first five rows of the test data

In [52]:
test_data.head()

Unnamed: 0,Easting,Northing,Borough_Barnet,Borough_Bexley,Borough_Brent,Borough_Bromley,Borough_Camden,Borough_City,Borough_City of Westminster,Borough_Croydon,...,Source_Small Private Vessels,Source_Small Scale Waste Burning,Source_Taxi,Source_TfL Bus,Source_WTS,Source_Wood Burning,nox,pm10,pm2.5,so2
0,-0.1051,1.686769,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
1,-1.783024,-0.786272,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,1.234381,1.766942,3.368122,0.0
2,0.76742,0.604814,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0
3,-0.1051,-0.477142,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,0.97389,3.215296,4.742593,0.0
4,-1.313205,1.764052,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,-0.236545,-0.295601,-0.404647,0.0


# Develop the Model

In [55]:
# Method: Using Support Vector Regression Algorith (SVR)

In [57]:
# Define the target columns(Pollutants)

In [59]:
target_columns = ['nox','pm10','pm2.5','so2']

In [61]:
# Define x (features) and y (target) for both train and test data.

In [63]:
x_train = train_data.drop(['nox', 'pm10', 'pm2.5', 'so2'], axis=1)
y_train = train_data[['nox', 'pm10', 'pm2.5', 'so2']]

In [65]:
x_test = test_data.drop(['nox','pm10','pm2.5', 'so2'], axis=1)
y_test = test_data[['nox','pm10','pm2.5','so2']]

In [67]:
# Print the shape of the training and test dataset

In [69]:
print (f"x_train shape: {x_train.shape}, y_train shape : {y_train.shape}")
print (f"x_test shape: {x_test.shape}, y_test shape : {y_test.shape}")

x_train shape: (59934, 95), y_train shape : (59934, 4)
x_test shape: (14984, 95), y_test shape : (14984, 4)


# Initialise the SVR Model and consolidate it with MultiOutput Regressor

In [72]:
# Scale the data using Standard Scaler

In [74]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train) # fit and transform the training data
x_test_scaled = scaler.transform(x_test) # transform the test data

# Create a Pipeline with Scaling, PCA and SVR Model 

In [77]:
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()), # Scale the features
    ('pca', PCA(n_components=0.95)),  # Apply PCA for Dimensional Reduction
    ('svr', MultiOutputRegressor(LinearSVR(random_state=42, max_iter=20000)))  # Fit the SVR model
])

# Train and Fit the Model

In [None]:
pipeline.fit(x_train, y_train)

# Make the Prediction on the test data

In [None]:
svr_predictions = pipeline.predict(x_test)

# Model Evaluation Using mean square error

# Evaluate each target's prediction by using mean square error and r square.

In [None]:
mse = mean_squared_error(y_test, svr_predictions)
r2 = r2_scores (y_test, svr_predictions)

In [None]:
print(f"Mean Squared Error: {mse}")
print(f"R-Squared: {r2}")

# Save the Train Model

# We will now save our trained model for future pruposes and predictions