Fitting a Line
-
Problem
-
- You want to train a model that represents a linear relationship between the feature and target vector.

Solution
-
- Use a linear regression (in scikit-learn, LinearRegression):

In [1]:
# Load libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston

# Load data with only two features
boston = load_boston()
features = boston.data[:,0:2]
target = boston.target

# Create linear regression
regression = LinearRegression()

# Fit the linear regression
model = regression.fit(features, target)


In [2]:
# View the intercept
model.intercept_

22.485628113468223

In [3]:
# View the feature coefficients
model.coef_

array([-0.35207832,  0.11610909])

In [4]:
# First value in the target vector multiplied by 1000
target[0]*1000

24000.0

In [5]:
# Predict the target value of the first observation, multiplied by 1000
model.predict(features)[0]*1000

24573.366631705547

In [6]:
# First coefficient multiplied by 1000
model.coef_[0]*1000

-352.07831564026736

# Exploring Dataset 

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [17]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston

# Load data with only two features
boston = load_boston()

In [18]:
features = boston.data
target = boston.target

In [21]:
ta = pd.DataFrame(target)
ta.head()

Unnamed: 0,0
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [20]:
fe = pd.DataFrame(features)
fe.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


# Building Model with own model

In [26]:
data = pd.read_csv("E:/MS_AI_IUB_DATASETS_2022/machine learning dataset/boston.csv")
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [27]:
# I use [[]] to create a dataframe
# if you use [], it will create a series

X = data[['RM']]
X.head()

Unnamed: 0,RM
0,6.575
1,6.421
2,7.185
3,6.998
4,7.147


In [28]:
Y = data[['MEDV']]
Y.head()

Unnamed: 0,MEDV
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7, random_state=1)

In [30]:
# let's see the shape of each dataset

print(X.shape)
print(Y.shape)
print()
print(X_train.shape)
print(Y_train.shape)
print()
print(X_test.shape)
print(Y_test.shape)

(506, 1)
(506, 1)

(354, 1)
(354, 1)

(152, 1)
(152, 1)


In [31]:
model.fit(X_train, Y_train)

LinearRegression()

In [32]:
# Load libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston

# Create linear regression
regression = LinearRegression()

# Fit the linear regression
model = regression.fit(X_train, Y_train)


In [33]:
# View the intercept
model.intercept_

array([-30.57103241])

In [34]:
# View the feature coefficients
model.coef_

array([[8.46109164]])

In [42]:
# Predict the target value of the first observation, multiplied by 1000
model.predict(X_train)[0]*1000

array([19764.0017383])

In [43]:
# First coefficient multiplied by 1000
model.coef_[0]*1000

array([8461.09163712])

# Handling Interactive Effects

In [1]:
# Load libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures


# Load data with only two features
boston = load_boston()
features = boston.data[:,0:2]
target = boston.target

In [2]:
# Create interaction term
interaction = PolynomialFeatures(
 degree=3, include_bias=False, interaction_only=True)
features_interaction = interaction.fit_transform(features)

In [3]:
# Create linear regression
regression = LinearRegression()

# Fit the linear regression
model = regression.fit(features_interaction, target)


In [4]:
# View the feature values for first observation
features[0]

array([6.32e-03, 1.80e+01])

In [5]:
# Import library
import numpy as np

# For each observation, multiply the values of the first and second feature
interaction_term = np.multiply(features[:, 0], features[:, 1])

In [7]:
interaction_term[0]

0.11376

In [8]:
# View the values of the first observation
features_interaction[0]

array([6.3200e-03, 1.8000e+01, 1.1376e-01])

# Fitting a Nonlinear Relationship

In [9]:
# Load library
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures

# Load data with one feature
boston = load_boston()
features = boston.data[:,0:1]
target = boston.target

# Create polynomial features x^2 and x^3
polynomial = PolynomialFeatures(degree=3, include_bias=False)
features_polynomial = polynomial.fit_transform(features)

# Create linear regression
regression = LinearRegression()

# Fit the linear regression
model = regression.fit(features_polynomial, target)

In [10]:
# View first observation
features[0]

array([0.00632])

In [11]:
# View first observation raised to the second power, x^2
features[0]**2

array([3.99424e-05])

In [12]:
# View first observation raised to the third power, x^3
features[0]**3

array([2.52435968e-07])

In [13]:
# View the first observation's values for x, x^2, and x^3
features_polynomial[0]


array([6.32000000e-03, 3.99424000e-05, 2.52435968e-07])

# Reducing Variance with Regularization

In [14]:
#Use a learning algorithm that includes a shrinkage penalty (also called regularization)
#like ridge regression and lasso regression:
# Load libraries

from sklearn.linear_model import Ridge
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler

# Load data
boston = load_boston()
features = boston.data
target = boston.target

# Standardize features
scaler = StandardScaler()

In [15]:
features_standardized = scaler.fit_transform(features)

# Create ridge regression with an alpha value
regression = Ridge(alpha=0.5)

# Fit the linear regression
model = regression.fit(features_standardized, target)

In [16]:
# Load library
from sklearn.linear_model import RidgeCV

# Create ridge regression with three alpha values
regr_cv = RidgeCV(alphas=[0.1, 1.0, 10.0])

# Fit the linear regression
model_cv = regr_cv.fit(features_standardized, target)

# View coefficients
model_cv.coef_

array([-0.91987132,  1.06646104,  0.11738487,  0.68512693, -2.02901013,
        2.68275376,  0.01315848, -3.07733968,  2.59153764, -2.0105579 ,
       -2.05238455,  0.84884839, -3.73066646])

In [17]:
# View alpha
model_cv.alpha_

1.0

# Reducing Features with Lasso Regression

In [18]:
#Use a lasso regression:
# Load library

from sklearn.linear_model import Lasso
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler

# Load data
boston = load_boston()

In [19]:
features = boston.data
target = boston.target

# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# Create lasso regression with alpha value
regression = Lasso(alpha=0.5)

# Fit the linear regression
model = regression.fit(features_standardized, target)

In [20]:
# View coefficients
model.coef_

array([-0.11526463,  0.        , -0.        ,  0.39707879, -0.        ,
        2.97425861, -0.        , -0.17056942, -0.        , -0.        ,
       -1.59844856,  0.54313871, -3.66614361])

In [21]:
# Create lasso regression with a high alpha
regression_a10 = Lasso(alpha=10)
model_a10 = regression_a10.fit(features_standardized, target)
model_a10.coef_

array([-0.,  0., -0.,  0., -0.,  0., -0.,  0., -0., -0., -0.,  0., -0.])