In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Load a sample dataset
df = sns.load_dataset('iris')

# Separate features and target
features = df.drop('species', axis=1)
target = df['species']

# Encode the target variable
encoder = LabelEncoder()
encoded_target = encoder.fit_transform(target)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, encoded_target, test_size=0.2, random_state=42)

# Print the shape of the resulting datasets
print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Testing target shape: {y_test.shape}")

# Optional: Print a few rows of the training and testing sets
print("\nFirst few rows of the training features:")
print(X_train.head())

print("\nFirst few rows of the training target:")
print(y_train[:5])

print("\nFirst few rows of the testing features:")
print(X_test.head())

print("\nFirst few rows of the testing target:")
print(y_test[:5])


Training features shape: (120, 4)
Testing features shape: (30, 4)
Training target shape: (120,)
Testing target shape: (30,)

First few rows of the training features:
    sepal_length  sepal_width  petal_length  petal_width
22           4.6          3.6           1.0          0.2
15           5.7          4.4           1.5          0.4
65           6.7          3.1           4.4          1.4
11           4.8          3.4           1.6          0.2
42           4.4          3.2           1.3          0.2

First few rows of the training target:
[0 0 1 0 0]

First few rows of the testing features:
     sepal_length  sepal_width  petal_length  petal_width
73            6.1          2.8           4.7          1.2
18            5.7          3.8           1.7          0.3
118           7.7          2.6           6.9          2.3
78            6.0          2.9           4.5          1.5
76            6.8          2.8           4.8          1.4

First few rows of the testing target:
[1 0 2 1 1]
