### Necessary Imports

In [60]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split 
from error_define import *
from dataset import *
from sklearn.metrics import mean_squared_error

### Reading Data from CSV File

In [61]:
X,Y = load_dataset()
X.head()

Unnamed: 0,PM2.5 (µg/m³),PM10 (µg/m³),NO (µg/m³),NO2 (µg/m³),NOx (ppb),NH3 (µg/m³),SO2 (µg/m³),CO (mg/m³),Ozone (µg/m³),AQI_calculated
0,46.0,80.0,1.29,9.16,12.02,27.19,13.56,0.4,15.8,67.0
1,46.0,80.0,1.74,8.93,12.48,30.29,13.71,0.41,15.52,68.0
2,45.62,79.92,1.87,8.56,12.17,28.2,13.88,0.41,15.33,68.0
3,41.0,72.92,1.83,8.72,12.37,26.69,13.77,0.4,15.3,68.0
4,41.0,79.0,1.69,7.91,11.3,26.83,13.87,0.41,15.49,68.0


#### Splitting the Dataset into training and testing data

In [62]:
#Splitting the dataset into 
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size =0.3,  random_state = 0 )        

# Implementing Linear regression class for prediction

In [63]:
class LinearRegression_Im:
    def __init__(self):
        self.coefficients = None
    
    def fit(self, X, y):
        """
        Fit the linear regression model to the training data.
        
        Parameters:
        X : numpy array, shape (n_samples, n_features)
            Training data.
        y : numpy array, shape (n_samples,)
            Target values.
        """
        # Add bias term to the features
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        
        # Calculate coefficients using normal equation
        self.coefficients = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
    
    def predict(self, X):
        """
        Predict target values for input data.
        
        Parameters:
        X : numpy array, shape (n_samples, n_features)
            Input data.
        
        Returns:
        predictions : numpy array, shape (n_samples,)
            Predicted target values.
        """
        
        # Add bias term to the features
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        
        # Make predictions
        predictions = X_b.dot(self.coefficients)
        return predictions

In [64]:
model = LinearRegression_Im()

model.fit( X_train, Y_train)

mean_squared_error(model.predict(X_test),Y_test)

6661.498418894131

In [65]:
downside_square_error(np.array(model.predict(X_test)),np.array(Y_test))

4242.004048555026

### Using the sklearn LinearRegression Model to predict the AQI

In [66]:
from sklearn.linear_model import LinearRegression              # Importing LinearRegression from sklearn
lr=LinearRegression().fit(X_train, Y_train)


mean_squared_error(model.predict(X_test),Y_test)              # Fitting our training data to train our model

6661.498418894131

In [67]:
downside_square_error(np.array(lr.predict(X_test)),np.array(Y_test))

4242.004048554982

### Checking Results on feature engineered dataset

In [68]:
X,Y = load_dataset_features()
#Splitting the dataset into 
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size =0.3,  random_state = 0 )        

In [69]:
model = LinearRegression_Im()

model.fit( X_train, Y_train)

mean_squared_error(model.predict(X_test),Y_test)

235548.54087966934

In [70]:
downside_square_error(np.array(model.predict(X_test)),np.array(Y_test))

152566.2431259419

In [71]:
model = LinearRegression()

model.fit( X_train, Y_train)

mean_squared_error(model.predict(X_test),Y_test)

6530.996999493072

In [72]:
downside_square_error(np.array(model.predict(X_test)),np.array(Y_test))

4173.474006890959