In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearnex import patch_sklearn, unpatch_sklearn

patch_sklearn()

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score,
    f1_score,
    recall_score,
    precision_score,
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
data = load_iris(as_frame=True)
featureDF = data["data"]
targetDF = data["target"]
irisDF = pd.concat([featureDF, targetDF], axis=1)


In [3]:
irisDF.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    int32  
dtypes: float64(4), int32(1)
memory usage: 5.4 KB


In [4]:
featureDF.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [5]:
targetDF.info()


<class 'pandas.core.series.Series'>
RangeIndex: 150 entries, 0 to 149
Series name: target
Non-Null Count  Dtype
--------------  -----
150 non-null    int32
dtypes: int32(1)
memory usage: 728.0 bytes


In [6]:
irisDF_two = irisDF[irisDF["target"].isin([0, 1])]
featureDF_two = irisDF_two.drop(columns=["target"])
targetDF_two = irisDF_two["target"]


In [7]:
x_train, x_test, y_train, y_test = train_test_split(
    featureDF_two,
    targetDF_two,
    test_size=0.2,
    random_state=20240304,
    stratify=targetDF_two,
)
x_test, x_val, y_test, y_val = train_test_split(
    x_test, y_test, test_size=0.3, random_state=20240304, stratify=y_test
)


In [8]:
x_train.shape, x_test.shape, x_val.shape, y_train.shape, y_test.shape, y_val.shape


((80, 4), (14, 4), (6, 4), (80,), (14,), (6,))

In [9]:
scaler = StandardScaler().fit(x_train)
scaled_x_train = scaler.transform(x_train)
scaled_x_test = scaler.transform(x_test)
scaled_x_val = scaler.transform(x_val)


In [10]:
model = KNeighborsClassifier()
ovrModel = OneVsRestClassifier(model).fit(scaled_x_train, y_train)


In [11]:
train_score = ovrModel.score(scaled_x_train, y_train)
test_score = ovrModel.score(scaled_x_test, y_test)
print(train_score, test_score)


1.0 1.0


In [12]:
y_pred = ovrModel.predict(scaled_x_val)
y_pred


array([0, 0, 1, 1, 0, 1])

In [13]:
y_val


19    0
9     0
0     1
6     1
8     0
2     1
Name: target, dtype: int32

In [14]:
f1_score(y_val, y_pred, average="weighted")


1.0

In [22]:
irisDF_one = irisDF[irisDF["target"].isin([0])]
featureDF_one = irisDF_one.drop(columns=["petal length (cm)", "target"])
# featureDF_one = irisDF_one[["petal width (cm)"]]
targetDF_one = irisDF_one["petal length (cm)"]


In [23]:
x_train2, x_test2, y_train2, y_test2 = train_test_split(
    featureDF_one, targetDF_one, test_size=0.2, random_state=20240304
)
x_test2, x_val2, y_test2, y_val2 = train_test_split(
    x_test2, y_test2, test_size=0.3, random_state=20240304
)


In [29]:
x_train2.shape, x_test2.shape, x_val2.shape, y_train2.shape, y_test2.shape, y_val2.shape


((40, 3), (7, 3), (3, 3), (40,), (7,), (3,))

In [31]:
scaler2 = StandardScaler().fit(x_train2)
scaled_x_train2 = scaler2.transform(x_train2)
scaled_x_test2 = scaler2.transform(x_test2)
scaled_x_val2 = scaler2.transform(x_val2)


In [32]:
model2 = LinearRegression().fit(scaled_x_train2, y_train2)


In [33]:
train_score2 = model2.score(scaled_x_train2, y_train2)
test_score2 = model2.score(scaled_x_test2, y_test2)
print(train_score2, test_score2)


0.11871964138534441 -0.0018425378823494487


In [34]:
y_pred2 = model2.predict(scaled_x_val2)
y_pred2


array([1.41727553, 1.50800716, 1.44842476])

In [36]:
mae = mean_absolute_error(y_val2, y_pred2)
mse = mean_squared_error(y_val2, y_pred2)
rmse = mean_squared_error(y_val2, y_pred2, squared=False)
print(mae, mse, rmse)


0.20876418275218409 0.06323554108159167 0.2514667792802693
