JupyterLab Notebook Examples:  
Verbose REFERENTIAL material to be utilized in a LINUX environment.  

GOTCHA - Be mindful of the Default Working Directory setting  

Install required Python components:  

        sudo dnf install pip  

        pip install scikit-learn --user

In [1]:
'''
'sklearn_TrainTestSplit_iris.ipynb'      

Extract 'X' and 'y' as type 'numpy.ndarray' from a
   'bunch' object when loading the 'iris' sklearn dataset.

Access the two ndarray tupple returned
    when loading the'iris' sklearn dataset.
    
Test ndarrays 'X' and 'Xn' for equality.
Test ndarrays 'yn' and 'yn' for equality.

Access the Pandas DataFrame and Pandas Series returned in a tupple
    when loading the'iris' sklearn dataset.

split ndarrays into training and testing sets using
    sklearn 'train_test_split'.

split Pandas Dataframes into training and testing sets
    sklearn 'train_test_split'.

Delete all created DataFrames from memory.
'''
#
from IPython.display import display
from pathlib import Path
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import os
import pandas as pd
import time
#
# Extract 'X' and 'y' as type 'numpy.ndarray' from a
#   'bunch' object when loading the 'iris' sklearn dataset.
#
# 411 about named argument(s) follows:
#
# IGNORE 'as_frame' when 'return_X_y=False' (Default) 
# 'return_X_y=False' (Default) will cause a 'bunch', an object of:
#   'sklearn.utils._bunch.Bunch' type to be returned.
#   Features ndarray 'X' may be extracted from 'bunch.data'.
#   Target ndarray 'y' may be extracted from 'bunch.target'.
#   Feature names list may be extracted from 'bunch.feature_names'.
#   Target names list may be extracted from 'bunch.target_names'.
# 
bunch = load_iris(return_X_y=False)
X = bunch.data
y = bunch.target
feature_names = bunch.feature_names
target_names = bunch.target_names
# printing features and target names of our dataset
print(f"Type of 'bunch' is: {type(bunch)}")
print(f"Type of 'X' is: {type(X)}")
print(f"Type of 'y' is: {type(y)}")
print(f"\n'Feature names:\n{feature_names}") 
print(f"'Target names:\n{target_names}")
print(f"\nFirst 5 rows of X:\n{X[:5]}")

#
# Access the two ndarray tupple returned
#   when loading the'iris' sklearn dataset.
#
# 411 about named argument(s) follows:
#
# 'as_frame=False' (Default) and 'return_X_y=True' will cause a
#   two ndarray tupple to be returned. Feature and Target names will
#   NOT be available.
Xn,yn =load_iris(return_X_y=True)
print(f"\nType of 'Xn' is: {type(Xn)}")
print(f"Type of 'yn' is: {type(yn)}")
# Requires EXPLICIT numpy import.
print(
    f"\nTesting ndarrays 'X' and 'Xn' for equality: "
    f'{np.array_equal(X,Xn)}'
)
print(f"Testing ndarrays 'y' and 'yn' for equality: "
      f'{np.array_equal(y,yn)}'
)
print(f"\nFirst 5 rows of Xn:\n{Xn[:5]}")

#
# Access the Pandas DataFrame and Pandas Series returned in a tupple
#   when loading the'iris' sklearn dataset.
#
# 411 about named argument(s) follows:
#
# 'as_frame=True' and 'return_X_y=True' will cause a
#   Pandas DataFrame and Pandas Series to be returned in a tupple.

#   two ndarray tupple to be returned. Feature and Target names will
#   NOT be available.
Xp,yp =load_iris(as_frame=True, return_X_y=True)
print(f"\nType of 'Xp' is: {type(Xp)}")
print(f"Type of 'yp' is: {type(yp)}")
print(f"\ndf_Xp.head():")
display(Xp.head())

# Split 'X' and 'y' ndarrays into training and testing sets.
# Providing 'test_size=0.4' for 150 rows of 'X' produces:
#   test data of 150 x 0.4 == 60 rows.
integer_random_seed = 1
# argument 'random_state' defau;ts to 'None'.
random_state = integer_random_seed
test_size = 0.4
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, random_state=random_state
)

# Print the shapes of the new X objects
# Print the shapes of the new y objects
print(f"'X_train Shape': {X_train.shape}")
print(f"'X_test Shape': {X_test.shape}")
print(f"\n'Y_train Shape': {y_train.shape}")
print(f"'Y_test Shape': {y_test.shape}")

# Split DataFrames 'Xp' and 'yp' into training and testing sets.
# Providing 'test_size=0.4' for 150 rows of 'X' produces:
#   test data of 150 x 0.4 == 60 rows.
integer_random_seed = 1
# argument 'random_state' defau;ts to 'None'.
random_state = integer_random_seed
test_size = 0.4
Xp_train, Xp_test, yp_train, yp_test = train_test_split(
    Xp, yp, test_size=test_size, random_state=random_state
)

# Print the shapes of the new X objects
# Print the shapes of the new y objects
print(f"\n'Xp_train Shape': {Xp_train.shape}")
print(f"'Xp_test Shape': {Xp_test.shape}")
print(f"\n'Yp_train Shape': {yp_train.shape}")
print(f"'Yp_test Shape': {yp_test.shape}")

# Delete DataFrames 'Xp', 'yp',
#   'Xp_train', 'Xp_test', 'yp_train' and 'yp_test' from memory.
del Xp
del yp
del Xp_train
del Xp_test
del yp_train
del yp_test

# Get time info.
time_local = time.localtime()
time_string = time.strftime('%Y-%m-%d %H:%M:%S %Z %z', time_local)
print(f'\n{time_string}')
# EOF

Type of 'bunch' is: <class 'sklearn.utils._bunch.Bunch'>
Type of 'X' is: <class 'numpy.ndarray'>
Type of 'y' is: <class 'numpy.ndarray'>

'Feature names:
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
'Target names:
['setosa' 'versicolor' 'virginica']

First 5 rows of X:
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]

Type of 'Xn' is: <class 'numpy.ndarray'>
Type of 'yn' is: <class 'numpy.ndarray'>

Testing ndarrays 'X' and 'Xn' for equality: True
Testing ndarrays 'y' and 'yn' for equality: True

First 5 rows of Xn:
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]

Type of 'Xp' is: <class 'pandas.core.frame.DataFrame'>
Type of 'yp' is: <class 'pandas.core.series.Series'>

df_Xp.head():


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


'X_train Shape': (90, 4)
'X_test Shape': (60, 4)

'Y_train Shape': (90,)
'Y_test Shape': (60,)

'Xp_train Shape': (90, 4)
'Xp_test Shape': (60, 4)

'Yp_train Shape': (90,)
'Yp_test Shape': (60,)

2024-10-02 18:17:49 PDT -0700
