# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Data PreProcessing

In [2]:
# Importing the dataset
dataset = pd.read_csv('SwedishMotorInsurance.csv')

In [3]:
# Splitting the dataset into input and output datasets
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

y = y.reshape(-1, 1)

In [4]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(missing_values=0.0, strategy='mean') # initialising SimpleImputer class so that it can change 0.0 values using mean of other values
imputer.fit(X[:, 4:6]) # fitting 4th and 5th columns
X[:, 4:6] = imputer.transform(X[:, 4:6]) # using transform function to impute missing values and setting our dataset equal to newly imputed dataset

imputer.fit(y)
y = imputer.transform(y)

In [32]:
import sys
import numpy
numpy.set_printoptions(threshold=100)

y = np.array(y)

# Encoding Independant variables:
    
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [6]:
# Scaling / Normalising Values

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
y = sc.fit_transform(y)

In [7]:
# Splitting the dataset into testing and training datasets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

# Linear Regression

In [8]:
#Linear Regression model

from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)

In [33]:
# Using model to predict 

from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

predictions = sc.inverse_transform(linreg.predict(X_test))
y_check = sc.inverse_transform(y_test)

print(np.concatenate((predictions.reshape(len(predictions),1), y_check.reshape(len(y_check),1)),1))

[[ 13343.97459754  31442.        ]
 [307271.07143925 312070.49582638]
 [309028.64472261 312070.49582638]
 ...
 [307096.23219114 312070.49582638]
 [  5340.3562914    1373.        ]
 [ 32685.73422097  34055.        ]]


In [23]:
# Checking accuracy

score = r2_score(predictions, y_check)
score

0.9930681598218419

# Polynomial Regression

In [52]:
# Polynomial Regression model

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 2)
X_poly_train = poly_reg.fit_transform(X_train)
X_poly_test = poly_reg.fit_transform(X_test)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly_train, y_train)
lin_reg_2.coef_

array([[ 1.83637801e+05, -1.71419975e+09,  2.03393056e+10,
        -2.66424418e+10,  2.77999878e+09,  1.60350646e+09,
         1.50880320e-02,  8.94381192e-04, -1.02922501e-02,
         1.29082739e-01,  9.61311237e-01,  3.23646727e+08,
         7.97674448e+09, -5.74949757e+09, -2.93084424e+08,
         8.34810108e+08,  5.83232655e+07, -1.02270491e+06,
        -1.20643454e+07, -8.40463887e+07, -9.69066488e+07,
        -8.48528327e+09,  2.43806410e+09,  5.27678866e+09,
         5.09588357e+09,  5.84224222e+07, -1.02444363e+06,
        -1.20848562e+07, -8.41892778e+07, -9.70714019e+07,
         1.27020517e+10, -3.82501557e+09, -3.26427640e+09,
         5.84224222e+07, -1.02444363e+06, -1.20848562e+07,
        -8.41892779e+07, -9.70714019e+07, -2.83934925e+09,
         1.05447026e+09,  5.80732936e+07, -1.01832162e+06,
        -1.20126379e+07, -8.36861681e+07, -9.64913095e+07,
        -1.51789470e+09,  5.77182794e+07, -1.01209641e+06,
        -1.19392021e+07, -8.31745771e+07, -9.59014380e+0

In [53]:
predictions2 = sc.inverse_transform(lin_reg_2.predict(X_poly_test))

print(np.concatenate((predictions.reshape(len(predictions),1), y_check.reshape(len(y_check),1)),1))

[[ 13343.97459754  31442.        ]
 [307271.07143925 312070.49582638]
 [309028.64472261 312070.49582638]
 ...
 [307096.23219114 312070.49582638]
 [  5340.3562914    1373.        ]
 [ 32685.73422097  34055.        ]]


In [54]:
score2 = r2_score(predictions2, y_check)
score2

0.9952884906412051

# SVR

In [55]:
# SVR Model

from sklearn.svm import SVR

regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [60]:
predictions3 = regressor.predict(X_test)
predictions3

array([-0.27857746, -0.08942306, -0.08555395, ..., -0.0967408 ,
       -0.27725795, -0.27637573])

In [61]:
score3 = r2_score(predictions3, y_test)
score3

0.6665490489167022