# Multiple Linear Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('df_patients_v20220321 - df_patients_v20220321.csv')
X = dataset.drop(columns=['DocID','PtID','P8','P9'])
X = X.iloc[:,:].values
y = dataset.iloc[:, 10].values

In [5]:
print(X)

[['lt60' 'no_op' 'medical' ... 11 'M' 'cardiovascular']
 ['lt60' 'no_op' 'medical' ... 5 'M' 'cardiovascular']
 ['ge60' 'no_op' 'medical' ... 5 'F' 'cardiovascular']
 ...
 ['lt60' 'emerg' 'surgical' ... 9 'F' 'gastrointestinal']
 ['lt60' 'emerg' 'medical' ... 12 'F' 'gastrointestinal']
 ['lt60' 'emerg' 'medical' ... 13 'M' 'gastrointestinal']]


## Encoding categorical data

### Encoding the Independent Variable

In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [7]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [2])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [8]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [4])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [9]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [11])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [10]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [-1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

## Splitting the dataset into the Training set and Test set

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [12]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

## Predicting the Test set results

In [13]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[ 5.77  7.  ]
 [ 4.95  0.  ]
 [ 5.92 16.  ]
 [ 6.38  1.  ]
 [ 5.67  0.  ]
 [ 6.87 27.  ]
 [ 6.62  3.  ]
 [ 6.14  7.  ]
 [ 7.22 14.  ]
 [ 7.87  9.  ]
 [ 5.3   7.  ]
 [ 6.93  6.  ]
 [ 5.53  2.  ]
 [ 7.45  3.  ]
 [ 6.36  7.  ]
 [ 5.71  2.  ]
 [ 5.7   1.  ]
 [ 7.63  4.  ]
 [ 7.52  6.  ]
 [ 7.86  4.  ]
 [ 5.95  1.  ]
 [ 4.14  4.  ]
 [ 6.63 11.  ]
 [ 5.48  4.  ]
 [ 4.64  6.  ]
 [ 7.59  5.  ]
 [ 5.26  7.  ]
 [ 4.18  6.  ]
 [ 6.74 13.  ]
 [ 7.99  8.  ]
 [ 6.11  0.  ]
 [ 6.42  6.  ]
 [ 7.82  8.  ]
 [ 5.3   5.  ]
 [ 5.63  0.  ]
 [ 7.41  5.  ]
 [ 5.75 11.  ]
 [ 7.34 14.  ]
 [ 5.76  2.  ]
 [ 7.46  3.  ]
 [ 6.79  3.  ]
 [ 7.43  5.  ]
 [ 6.49  2.  ]
 [ 6.88  4.  ]
 [ 6.61  5.  ]
 [ 6.67  8.  ]
 [ 6.58  3.  ]
 [ 6.15  1.  ]
 [ 7.26  1.  ]
 [ 6.87  8.  ]
 [ 5.88 33.  ]
 [ 7.66 15.  ]
 [ 6.04  3.  ]
 [ 5.98  9.  ]
 [ 7.52  5.  ]
 [ 7.48 14.  ]
 [ 7.42  4.  ]
 [ 4.94  1.  ]
 [ 7.82  7.  ]
 [ 8.03  5.  ]
 [ 7.27  0.  ]
 [ 6.14 19.  ]
 [ 6.1   2.  ]
 [ 6.85 12.  ]
 [ 7.53  9.  ]
 [ 6.38  7.  ]
 [ 5.52 10

In [14]:
from sklearn.metrics import mean_squared_error
errors = mean_squared_error(y_test, y_pred)
print(errors)

40.17763518263893
