# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Data PreProcessing

In [2]:
# Importing the dataset
dataset = pd.read_csv('SwedishMotorInsurance.csv')

In [3]:
# Splitting the dataset into input and output datasets
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

y = y.reshape(-1, 1)

In [4]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(missing_values=0.0, strategy='mean') # initialising SimpleImputer class so that it can change 0.0 values using mean of other values
imputer.fit(X[:, 4:6]) # fitting 4th and 5th columns
X[:, 4:6] = imputer.transform(X[:, 4:6]) # using transform function to impute missing values and setting our dataset equal to newly imputed dataset

imputer.fit(y)
y = imputer.transform(y)

In [5]:
import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

y = np.array(y)

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.fit_transform(X_test)
X = sc.fit_transform(X)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

# Linear Regression

In [8]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)

In [28]:
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

predictions = linreg.predict(X_test)
predictions

array([[ 1.33573360e+04],
       [ 3.07283680e+05],
       [ 3.09035356e+05],
       [ 2.56123663e+05],
       [ 2.52887310e+05],
       [ 8.94072058e+04],
       [ 1.60068264e+04],
       [ 2.98420081e+05],
       [ 7.74181393e+02],
       [ 3.38125622e+04],
       [ 2.94466967e+05],
       [ 1.71811961e+04],
       [ 2.84702282e+05],
       [ 1.36764514e+04],
       [ 3.29415621e+04],
       [ 3.37485446e+04],
       [ 8.38929072e+04],
       [ 1.16098985e+05],
       [ 3.25271441e+04],
       [ 2.78213852e+05],
       [ 7.53598977e+05],
       [ 2.92810701e+05],
       [ 1.55013476e+04],
       [ 7.09438101e+04],
       [ 3.30928264e+04],
       [ 3.56826661e+04],
       [ 3.52289414e+04],
       [ 3.15213837e+05],
       [ 6.94985288e+04],
       [ 2.90278285e+05],
       [ 7.12922006e+04],
       [ 5.64600432e+05],
       [ 1.16068636e+03],
       [ 4.03287939e+05],
       [ 3.69856762e+04],
       [ 3.11463346e+05],
       [ 1.07979909e+06],
       [ 3.00570839e+05],
       [ 3.3

In [22]:
score = r2_score(predictions, y_test)
score

0.9930683753166462

# Polynomial Regression

In [20]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 5)
X_poly_train = poly_reg.fit_transform(X_train)
X_poly_test = poly_reg.fit_transform(X_test)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly_train, y_train)
lin_reg_2.coef_

array([[ 1.35846757e+01, -3.51366151e+02, -3.23394843e+02,
        -2.76686972e+02,  3.38478529e+02,  6.20188387e+02,
         2.96771906e+03, -2.73782237e+03,  9.84335326e+02,
         5.35020762e+04, -4.33729562e+03, -5.22367966e+02,
         3.38282813e+02,  3.15295611e+02,  4.66751138e+00,
        -1.38431645e+02, -1.24781832e+03,  1.18284101e+03,
         3.83728086e+02, -2.18132564e+04,  4.23251513e+03,
        -4.78511017e+02,  3.01714265e+02, -9.64916994e+00,
        -1.53094513e+02, -9.37652900e+02,  1.07477251e+03,
        -2.32885876e+02, -2.07477273e+04,  9.25916612e+03,
        -4.11207676e+02, -3.24249399e+01, -1.75671087e+02,
        -2.41743336e+02,  8.37395925e+02,  2.28172694e+02,
        -2.58489344e+04,  1.29952193e+04,  5.10884211e+02,
        -4.75996807e+02,  4.45033745e+02, -1.39818707e+03,
         1.24383482e+01, -1.02877407e+04,  1.16549082e+04,
         9.51471742e+02,  2.00657049e+03, -1.72400690e+03,
        -3.95779323e+02,  7.95573976e+04, -3.85296857e+0

In [17]:
predictions2 = lin_reg_2.predict(X_poly_test)
predictions2

array([[ 7.79183717e+03],
       [ 3.15048522e+05],
       [ 3.16384263e+05],
       [ 7.58317151e+05],
       [ 1.60950199e+05],
       [ 7.30467584e+04],
       [ 1.15014383e+04],
       [ 2.96293763e+05],
       [ 9.63819250e+03],
       [ 1.40414631e+04],
       [ 3.05406488e+05],
       [ 1.68826493e+04],
       [ 3.41737337e+05],
       [ 1.21645596e+04],
       [ 2.37050587e+04],
       [ 1.87890039e+04],
       [ 7.10809804e+04],
       [ 1.51345685e+05],
       [ 1.28192430e+04],
       [ 3.64374317e+05],
       [ 8.86685523e+05],
       [ 3.02195123e+05],
       [ 2.26223752e+04],
       [ 9.57780542e+04],
       [ 4.96330507e+04],
       [-1.35581766e+04],
       [ 2.45876930e+04],
       [ 3.35051774e+05],
       [ 7.02203395e+04],
       [ 3.11985296e+05],
       [ 1.27409725e+05],
       [ 1.39387362e+06],
       [ 2.43147981e+04],
       [ 5.31010102e+05],
       [ 1.29994980e+04],
       [ 3.40268812e+05],
       [ 2.06096205e+06],
       [ 3.13239181e+05],
       [-5.0

# SVR

In [12]:
from sklearn.svm import SVR

regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [16]:
from sklearn.metrics import accuracy_score

predictions3 = regressor.predict(X_test)
predictions3

array([69044.81813084, 69080.37745146, 69059.976542  , 69051.63275601,
       69052.37888976, 69033.22057913, 69017.89046553, 69057.55365921,
       69051.8092808 , 69081.44062005, 69067.98979694, 69031.48142487,
       69075.39300898, 69029.89386558, 69036.43697722, 69036.5066491 ,
       69068.36228242, 69050.20813353, 69027.00394591, 69082.34097521,
       69063.05776236, 69055.11777391, 69067.60118459, 69029.70593495,
       69030.94829044, 69075.27358505, 69042.5546472 , 69083.08718788,
       69059.47325125, 69066.98177073, 69061.50282547, 69082.18174962,
       69047.199833  , 69067.28238236, 69053.79804411, 69074.35553108,
       69096.66906941, 69057.51598526, 69074.32526684, 69067.73475585,
       69050.89523162, 69069.09961372, 69057.63424189, 69088.2658117 ,
       69033.22017537, 69042.26225324, 69035.34264878, 69062.56927922,
       69083.35300999, 69017.15977865, 69042.25709109, 69054.23651452,
       69061.67794197, 69054.71241414, 69050.04023235, 69078.77788002,
      