# K-fold cross-validation

In [8]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline

# Load the Iris dataset
iris = load_iris()
X = iris.data
Y = iris.target

# Step 1: Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Create and fit the logistic regression model
model = make_pipeline(scaler, LogisticRegression(max_iter=1000))

# Step 3: Perform cross-validation
k_fold = KFold(n_splits=7)
scores = cross_val_score(model, X_scaled, Y, cv=k_fold)

# Step 4: Print the cross-validation scores
print("Cross Validation Score: {}".format(scores))
print("Average Cross Validation Score: {}".format(scores.mean()))


Cross Validation Score: [1.         1.         0.72727273 0.47619048 0.95238095 0.61904762
 0.71428571]
Average Cross Validation Score: 0.7841682127396414


# Holdout cross-validation

In [9]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
Y = iris.target

# Step 1: Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

lr = LogisticRegression()

# Step 2: Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.25, random_state=42)

# Step 3: Fit the logistic regression model on the training data
lr.fit(x_train, y_train)

# Step 4: Make predictions on the testing data
results = lr.predict(x_test)

# Step 5: Calculate and print the accuracy
print("Training accuracy: {}".format(accuracy_score(lr.predict(x_train), y_train)))
print("Testing accuracy: {}".format(accuracy_score(results, y_test)))

Training accuracy: 0.9107142857142857
Testing accuracy: 0.9736842105263158


# Stratified k-fold cross-validation

In [16]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import make_pipeline

# Load the Iris dataset
iris = load_iris()
X = iris.data
Y = iris.target

# Step 1: Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Create and fit the logistic regression model
model = make_pipeline(scaler, LogisticRegression(max_iter=1000))

# Step 3: Perform cross-validation
st_kf = StratifiedKFold(n_splits=3)
scores = cross_val_score(model, X_scaled, Y, cv=st_kf)

# Step 4: Print the cross-validation scores
print("Cross Validation Score: {}".format(scores))
print("Average Cross Validation Score: {}".format(scores.mean()))

Cross Validation Score: [0.88 0.94 0.92]
Average Cross Validation Score: 0.9133333333333332


# Leave-one-out cross-validation

In [19]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut, cross_val_score
from sklearn.pipeline import make_pipeline

# Load the Iris dataset
iris = load_iris()
X = iris.data
Y = iris.target

# Step 1: Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

rfc = RandomForestClassifier(n_estimators=7, max_depth=4, n_jobs=-1)

# Step 2: Create and fit the random forest classifier model
model = make_pipeline(scaler, rfc)

# Step 3: Perform cross-validation
leave_one_out = LeaveOneOut()
scores = cross_val_score(model, X_scaled, Y, cv=leave_one_out)

# Step 4: Print the cross-validation scores
print("Cross Validation Score: {}".format(scores))
print("Average Cross Validation Score: {}".format(scores.mean()))

Cross Validation Score: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
Average Cross Validation Score: 0.94
