# 11.1, 11.2, 11.8 Sample Code

## 11.1 Cross-Validating Models

In [2]:
import pandas as pd
import numpy as np
import json
import sys
import warnings
from sklearn.datasets import make_regression
from sklearn.feature_selection import RFECV
from sklearn import datasets, linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.decomposition import NMF
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

In [3]:
digits = datasets.load_digits()
features=digits.data
target=digits.target
standardizer=StandardScaler()
logit=LogisticRegression()
pipeline=make_pipeline(standardizer, logit)
kf=StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
cv_results=cross_val_score(pipeline, # Pipeline
                           features, # Feature matrix
                           target,#target vector
                           cv=kf, # Cross-validation technique
                           scoring = "accuracy", # Loss function
                           n_jobs = -1)
print(cv_results.mean())
print (cv_results)
print("results")

0.9666076970825573
[0.96666667 0.97222222 0.98333333 0.95555556 0.96666667 0.95
 0.97777778 0.96089385 0.96648045 0.96648045]
results


## 11.2 Creating a Baseline Regression Model

In [4]:
from sklearn.datasets import load_boston
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split

In [5]:
#load data
boston=load_boston()

In [6]:
#create features
features, target = boston.data, boston.target

In [7]:
#make test and training split
features_train, features_test, target_train, target_test = train_test_split(features, target, random_state=0)

In [8]:
#create a dummy regressor
dummy=DummyRegressor(strategy='mean')

In [9]:
#"Train" dummy regressor
dummy.fit(features_train, target_train)

DummyRegressor()

In [10]:
#get R-squared score
print(dummy.score(features_test, target_test))

-0.001119359203955339


### to compare, train the model and evaluate the performance score

In [11]:
#load library
from sklearn.linear_model import LinearRegression

In [12]:
#train simple linear regression model
ols=LinearRegression()
ols.fit(features_train, target_train)

LinearRegression()

In [13]:
#get r-squared score
print(ols.score(features_test, target_test))

0.6354638433202118


## 11.8 Evaluation Regression Models

In [14]:
#Load libraries
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

In [15]:
#generate features matrix, target vector
features, target=make_regression (n_samples=100,
                                  n_features = 3,
                                  n_informative = 3,
                                  n_targets = 1,
                                  noise = 50,
                                  coef = False,
                                  random_state = 1)

In [16]:
#create a linear regression object
ols = LinearRegression()

In [17]:
#cross-validate the linear regression using (negative) MSE
print("MSE:  ",cross_val_score(ols, features, target, scoring = 'neg_mean_squared_error'))

MSE:   [-1974.65337976 -2004.54137625 -3935.19355723 -1060.04361386
 -1598.74104702]


In [18]:
#cross-validate the linear regression using R-squared
print("R2:", cross_val_score(ols, features, target, scoring = 'r2'))

R2: [0.8622399  0.85838075 0.74723548 0.91354743 0.84469331]
