<a href="https://colab.research.google.com/github/0xjdavis/0xjdavis/blob/main/AI_Makerspace_Intro_to_Optimization_(Through_Loss).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multi-class Classification on Iris Dataset with Cross-Entropy Loss

## 1. Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## 2. Loading and Exploring the Iris Dataset

In [None]:
# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names

# Create a DataFrame for easier data manipulation
iris_df = pd.DataFrame(X, columns=feature_names)
iris_df['species'] = [class_names[i] for i in y]

# Display basic information about the dataset
print("Iris Dataset Shape:", X.shape)
print("Feature Names:", feature_names)
print("Class Names:", class_names)
print("\nSample of the dataset:")
print(iris_df.head())
print("\nClass distribution:")
print(iris_df['species'].value_counts())

Iris Dataset Shape: (150, 4)
Feature Names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Class Names: ['setosa' 'versicolor' 'virginica']

Sample of the dataset:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

  species  
0  setosa  
1  setosa  
2  setosa  
3  setosa  
4  setosa  

Class distribution:
species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64


## 3. Data Visualization

In [None]:
fig = px.scatter_matrix(
    iris_df,
    dimensions=feature_names,
    color="species",
    symbol="species",
    title="Iris Dataset Scatter Matrix",
    labels={col: col.replace('(cm)', '').replace('_', ' ') for col in feature_names},
    height=800
)

# Update layout
fig.update_traces(diagonal_visible=False)
fig.update_layout(
    title_font_size=20,
    font=dict(size=12),
)

fig.show()

## 4. Data Preprocessing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

Training set shape: (105, 4)
Testing set shape: (45, 4)


# Cross-Entropy Loss Visualization for Multi-class Classification

## Understanding Cross-Entropy Loss in Logistic Regression

### Introduction to Cross-Entropy Loss

Cross-entropy loss (also known as log loss) is a key cost function in classification tasks, particularly for multi-class problems. It measures the performance of a classification model whose output is a probability value between 0 and 1. In multi-class classification like our Iris dataset example, cross-entropy loss penalizes incorrect predictions more heavily than correct ones.

### Cross-Entropy Loss Formula

For multi-class classification with $K$ classes, the cross-entropy loss function is:

$$L = -\frac{1}{N} \sum_{i=1}^{N} \sum_{c=1}^{K} y_{ic} \log(p_{ic})$$

Where:
- $N$ is the number of samples
- $K$ is the number of classes
- $y_{ic}$ is 1 if sample $i$ belongs to class $c$ and 0 otherwise (one-hot encoding)
- $p_{ic}$ is the predicted probability that sample $i$ belongs to class $c$

Let's implement a function to calculate this:

In [None]:
def calculate_cross_entropy_loss(y_true, y_pred_prob):
    """
    Calculate cross-entropy loss for multi-class classification.

    Parameters:
    y_true : array-like of shape (n_samples,)
        True class labels
    y_pred_prob : array-like of shape (n_samples, n_classes)
        Predicted probabilities for each class

    Returns:
    float : The cross-entropy loss
    """
    n_samples = len(y_true)
    n_classes = y_pred_prob.shape[1]

    # Convert y_true to one-hot encoding
    y_true_one_hot = np.zeros((n_samples, n_classes))
    for i in range(n_samples):
        y_true_one_hot[i, y_true[i]] = 1

    # Calculate loss with a small epsilon to prevent log(0)
    epsilon = 1e-15
    y_pred_prob = np.clip(y_pred_prob, epsilon, 1 - epsilon)
    loss = -np.sum(y_true_one_hot * np.log(y_pred_prob)) / n_samples

    return loss

### Visualizing Cross-Entropy Loss

#### How Loss Changes as Probabilities Change

Let's visualize how the cross-entropy loss changes when our prediction probabilities change:


In [None]:
def plot_binary_cross_entropy():
    """
    Plot binary cross-entropy loss for a single example as a function of predicted probability.
    """
    # Generate probability values
    p = np.linspace(0.001, 0.999, 1000)

    # Calculate loss for y=1 and y=0
    loss_y1 = -np.log(p)  # Loss when true class is 1
    loss_y0 = -np.log(1-p)  # Loss when true class is 0

    # Create plot
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=p, y=loss_y1,
        mode='lines',
        name='True class = 1',
        line=dict(color='blue', width=2)
    ))

    fig.add_trace(go.Scatter(
        x=p, y=loss_y0,
        mode='lines',
        name='True class = 0',
        line=dict(color='red', width=2)
    ))

    fig.update_layout(
        title='Binary Cross-Entropy Loss vs. Predicted Probability',
        xaxis_title='Predicted Probability P(y=1)',
        yaxis_title='Loss',
        height=500,
        width=800,
        legend=dict(x=0.01, y=0.99),
        yaxis=dict(range=[0, 8])
    )

    return fig

# Plot binary cross-entropy
binary_loss_fig = plot_binary_cross_entropy()
binary_loss_fig.show()

#### Cross-Entropy Loss During Training

Now, let's train a logistic regression model and visualize how the cross-entropy loss changes during training:

In [None]:
def visualize_loss_during_training():
    """
    Visualize how cross-entropy loss changes during logistic regression training.
    """
    # Create a logistic regression model with multi_class option
    model = LogisticRegression(
        multi_class='multinomial',
        solver='lbfgs',
        C=10,
        max_iter=1000,
        random_state=42
    )

    # Get the training data
    X = X_train_scaled
    y = y_train

    # Initialize array to store loss at each iteration
    max_iter = 100  # Number of iterations to show
    loss_history = []
    weights_history = []
    intercepts_history = []

    # Train model with different max_iter values to simulate training progress
    for i in range(1, max_iter+1):
        temp_model = LogisticRegression(
            solver='lbfgs',
            C=10,
            max_iter=i,
            random_state=42
        )

        # Fit the model
        temp_model.fit(X, y)

        # Get probability predictions
        y_pred_prob = temp_model.predict_proba(X)

        # Calculate cross-entropy loss
        loss = calculate_cross_entropy_loss(y, y_pred_prob)
        loss_history.append(loss)

        # Store weights and intercepts
        weights_history.append(temp_model.coef_.copy())
        intercepts_history.append(temp_model.intercept_.copy())

    # Create subplot with loss curve
    fig = make_subplots(rows=2, cols=1,
                         subplot_titles=('Cross-Entropy Loss During Training',
                                        'Model Coefficients Evolution'),
                         vertical_spacing=0.15,
                         row_heights=[0.4, 0.6])

    # Add loss curve
    fig.add_trace(
        go.Scatter(
            x=list(range(1, max_iter+1)),
            y=loss_history,
            mode='lines+markers',
            name='Training Loss',
            line=dict(color='blue', width=2)
        ),
        row=1, col=1
    )

    # Add coefficient evolution traces (one line per feature per class)
    classes = ['Setosa', 'Versicolor', 'Virginica']
    colors = ['red', 'green', 'blue']

    for class_idx in range(3):  # Three classes
        for feat_idx in range(4):  # Four features
            weight_values = [weights[class_idx][feat_idx] for weights in weights_history]
            fig.add_trace(
                go.Scatter(
                    x=list(range(1, max_iter+1)),
                    y=weight_values,
                    mode='lines',
                    name=f'{classes[class_idx]}: {feature_names[feat_idx]}',
                    line=dict(color=colors[class_idx],
                              width=1.5,
                              dash='solid' if feat_idx % 2 == 0 else 'dash')
                ),
                row=2, col=1
            )

    # Update layout
    fig.update_layout(
        height=800,
        width=900,
        title_text="Cross-Entropy Loss and Model Coefficients During Training",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5)
    )

    fig.update_xaxes(title_text="Training Iterations", row=1, col=1)
    fig.update_yaxes(title_text="Cross-Entropy Loss", row=1, col=1)
    fig.update_xaxes(title_text="Training Iterations", row=2, col=1)
    fig.update_yaxes(title_text="Coefficient Value", row=2, col=1)

    return fig

# Create the training visualization
training_vis_fig = visualize_loss_during_training()
training_vis_fig.show()


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to th

### Visualizing the Decision Process with Probabilities

Let's visualize how the softmax function (used in multinomial logistic regression) converts raw scores to probabilities:

In [None]:
def plot_softmax_visualization():
    """
    Visualize how the softmax function converts raw scores to probabilities.
    """
    # Train the logistic regression model (using default without multi_class param to avoid FutureWarning)
    model = LogisticRegression(solver='lbfgs', C=10, max_iter=1000, random_state=42)
    model.fit(X_train_scaled, y_train)

    # Get a few example points
    n_examples = 5
    sample_indices = np.random.choice(len(X_test), n_examples, replace=False)
    X_samples = X_test_scaled[sample_indices]
    y_samples = y_test[sample_indices]

    # Calculate raw scores (before softmax)
    raw_scores = X_samples @ model.coef_.T + model.intercept_

    # Calculate softmax probabilities
    def softmax(x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    probabilities = softmax(raw_scores)

    # Create a figure with subplots
    fig = make_subplots(
        rows=n_examples,
        cols=2,
        subplot_titles=[f"Example {i+1}: Raw Scores" for i in range(n_examples)] +
                       [f"Example {i+1}: Probabilities (Softmax)" for i in range(n_examples)],
        specs=[[{"type": "bar"}, {"type": "bar"}] for _ in range(n_examples)],
        vertical_spacing=0.05,
        horizontal_spacing=0.1
    )

    # Add the traces for each example
    for i in range(n_examples):
        # Raw scores
        fig.add_trace(
            go.Bar(
                x=class_names,
                y=raw_scores[i],
                name=f"Raw Scores {i+1}",
                marker_color=['lightblue', 'lightgreen', 'lightcoral'],
                showlegend=False
            ),
            row=i+1, col=1
        )

        # Probabilities
        fig.add_trace(
            go.Bar(
                x=class_names,
                y=probabilities[i],
                name=f"Probabilities {i+1}",
                marker_color=['blue', 'green', 'red'],
                showlegend=False,
                text=[f"{p:.2f}" for p in probabilities[i]],
                textposition="auto"
            ),
            row=i+1, col=2
        )

        # Add a title showing the true class - using subplot title approach instead of annotations
        true_class = class_names[y_samples[i]]
        subplot_title = f"Example {i+1}: True Class = {true_class}"

        # Update the subplot title for this example
        fig.layout.annotations[i].text = subplot_title

    # Update layout
    fig.update_layout(
        height=200 * n_examples,
        width=900,
        title_text="From Raw Scores to Probabilities with Softmax",
    )

    # Update y-axis ranges for better visualization
    for i in range(1, n_examples+1):
        fig.update_yaxes(title_text="Score Value", range=[min(raw_scores.min()-0.5, -2), max(raw_scores.max()+0.5, 2)], row=i, col=1)
        fig.update_yaxes(title_text="Probability", range=[0, 1.05], row=i, col=2)

    return fig

# Create the softmax visualization
softmax_vis_fig = plot_softmax_visualization()
softmax_vis_fig.show()

### Cross-Entropy Loss Surface

Finally, let's visualize the cross-entropy loss surface for a simplified 2D projection:

In [None]:
def visualize_loss_surface():
    """
    Create a 3D visualization of the cross-entropy loss surface for a simplified 2D projection.
    """
    # Train a simplified logistic regression model using only 2 features
    # This makes it easier to visualize the loss surface
    X_simple = X_train_scaled[:, :2]  # Use only first two features
    y_simple = y_train

    simple_model = LogisticRegression(solver='lbfgs', C=10, max_iter=1000, random_state=42)
    simple_model.fit(X_simple, y_simple)

    # Get coefficients and intercepts for the first class
    w1_actual = simple_model.coef_[0, 0]  # First coefficient for class 0
    w2_actual = simple_model.coef_[0, 1]  # Second coefficient for class 0

    # Create a range of values around the actual values
    w1_range = np.linspace(w1_actual - 2, w1_actual + 2, 30)
    w2_range = np.linspace(w2_actual - 2, w2_actual + 2, 30)
    w1_mesh, w2_mesh = np.meshgrid(w1_range, w2_range)

    # Calculate loss for each weight combination
    loss_values = np.zeros_like(w1_mesh)

    # Original model predictions for comparison
    base_probs = simple_model.predict_proba(X_simple)
    base_loss = calculate_cross_entropy_loss(y_simple, base_probs)

    # Calculate loss for each weight combination
    for i in range(w1_mesh.shape[0]):
        for j in range(w1_mesh.shape[1]):
            # Create temporary coefficients
            temp_coef = simple_model.coef_.copy()
            temp_coef[0, 0] = w1_mesh[i, j]
            temp_coef[0, 1] = w2_mesh[i, j]

            # Calculate raw scores
            raw_scores = X_simple @ temp_coef.T + simple_model.intercept_

            # Apply softmax to get probabilities
            def softmax(x):
                exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
                return exp_x / np.sum(exp_x, axis=1, keepdims=True)

            probabilities = softmax(raw_scores)

            # Calculate loss
            loss = calculate_cross_entropy_loss(y_simple, probabilities)
            loss_values[i, j] = loss

    # Create 3D surface plot
    fig = go.Figure(data=[
        go.Surface(
            z=loss_values,
            x=w1_mesh,
            y=w2_mesh,
            colorscale='Viridis',
            colorbar=dict(title='Loss Value')
        )
    ])

    # Mark the actual model coefficient point
    fig.add_trace(
        go.Scatter3d(
            x=[w1_actual],
            y=[w2_actual],
            z=[base_loss],
            mode='markers',
            marker=dict(
                size=8,
                color='red',
            ),
            name='Optimal Point'
        )
    )

    # Update layout
    fig.update_layout(
        title='Cross-Entropy Loss Surface',
        scene=dict(
            xaxis_title='Weight 1 (Feature: ' + feature_names[0] + ')',
            yaxis_title='Weight 2 (Feature: ' + feature_names[1] + ')',
            zaxis_title='Loss',
            camera=dict(
                eye=dict(x=1.5, y=1.5, z=1.2)
            )
        ),
        width=800,
        height=700,
        margin=dict(l=0, r=0, b=0, t=40)
    )

    return fig

# Create the loss surface visualization
loss_surface_fig = visualize_loss_surface()
loss_surface_fig.show()

## 5. Model Training with Cross-Entropy Loss

In [None]:
# LogisticRegression uses Cross-Entropy Loss (log loss) by default for multi-class classification
# 'multinomial' option enables softmax regression for multi-class problems
model = LogisticRegression(
    solver='lbfgs',             # LBFGS solver works well with multinomial
    C=10,                       # Inverse of regularization strength
    max_iter=1000,              # Increase max_iter to ensure convergence
    random_state=42
)

# Train the model
model.fit(X_train_scaled, y_train)

print("Model Coefficients Shape:", model.coef_.shape)
print("Model Intercepts:", model.intercept_)

Model Coefficients Shape: (3, 4)
Model Intercepts: [ 0.05090703  3.83730121 -3.88820824]


## 6. Model Evaluation

In [None]:
# Make predictions
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)

Accuracy: 0.9111

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.82      0.93      0.88        15
   virginica       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45




## 7. Visualizing Confusion Matrix with Plotly

In [None]:
# Create confusion matrix visualization using Plotly
fig = go.Figure(data=go.Heatmap(
    z=cm,
    x=class_names,
    y=class_names,
    colorscale='Blues',
    hoverongaps=False,
    text=cm,
    texttemplate="%{text}",
    textfont={"size": 16},
))

fig.update_layout(
    title="Confusion Matrix",
    xaxis_title="Predicted Label",
    yaxis_title="True Label",
    width=600,
    height=500,
)

fig.show()

## 8. Visualizing Decision Boundaries (2D Projection)

In [None]:
# Function to plot decision boundaries
def plot_decision_boundaries(model, X, y, feature_idx1=0, feature_idx2=1):
    # Create a mesh grid on which we will run our model
    h = 0.02  # step size in the mesh
    x_min, x_max = X[:, feature_idx1].min() - 1, X[:, feature_idx1].max() + 1
    y_min, y_max = X[:, feature_idx2].min() - 1, X[:, feature_idx2].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Create features for all points in the mesh grid
    features = np.zeros((xx.ravel().shape[0], X.shape[1]))
    features[:, feature_idx1] = xx.ravel()
    features[:, feature_idx2] = yy.ravel()

    # Mean values for other features
    for i in range(X.shape[1]):
        if i != feature_idx1 and i != feature_idx2:
            features[:, i] = X[:, i].mean()

    # Scale the features
    features_scaled = scaler.transform(features)

    # Get predictions for all points in the mesh grid
    Z = model.predict(features_scaled)
    Z = Z.reshape(xx.shape)

    # Create a contour plot
    fig = go.Figure()

    # Add contour plot for decision boundaries
    fig.add_trace(go.Contour(
        z=Z,
        x=np.arange(x_min, x_max, h),
        y=np.arange(y_min, y_max, h),
        colorscale='RdBu',
        showscale=False,
        line=dict(width=0),
        contours=dict(showlabels=False)
    ))

    # Add scatter plot for actual data points
    for i, name in enumerate(class_names):
        idx = y == i
        fig.add_trace(go.Scatter(
            x=X[idx, feature_idx1],
            y=X[idx, feature_idx2],
            mode='markers',
            name=name,
            marker=dict(
                size=10,
                line=dict(width=1, color='DarkSlateGrey')
            )
        ))

    # Update layout
    fig.update_layout(
        title=f"Decision Boundaries using {feature_names[feature_idx1]} and {feature_names[feature_idx2]}",
        xaxis_title=feature_names[feature_idx1],
        yaxis_title=feature_names[feature_idx2],
        height=600,
        width=800,
        legend=dict(y=0.99, x=0.01, yanchor='top'),
        margin=dict(l=65, r=50, b=65, t=90)
    )

    return fig

# Plot decision boundaries for sepal length and sepal width
fig1 = plot_decision_boundaries(model, X, y, 0, 1)
fig1.show()

# Plot decision boundaries for petal length and petal width
fig2 = plot_decision_boundaries(model, X, y, 2, 3)
fig2.show()

## 9. Visualizing Probabilities for Test Samples

In [None]:
# Get predicted probabilities for test set
y_proba = model.predict_proba(X_test_scaled)

# Create a subplot with prediction probabilities for the first few test samples
n_samples = 10
fig = make_subplots(rows=1, cols=1, subplot_titles=["Prediction Probabilities for Test Samples"])

for i in range(min(n_samples, len(y_test))):
    fig.add_trace(
        go.Bar(
            x=class_names,
            y=y_proba[i],
            name=f"Sample {i+1} (True: {class_names[y_test[i]]})",
            text=[f"{prob:.2f}" for prob in y_proba[i]],
            textposition="auto"
        )
    )

fig.update_layout(
    title="Prediction Probabilities for Test Samples",
    xaxis_title="Class",
    yaxis_title="Probability",
    height=500,
    width=800,
    barmode='group',
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()

## 11. Learning Curves

In [None]:
from sklearn.model_selection import StratifiedKFold, cross_val_score

# Let's implement a custom learning curve function to ensure proper class representation
def custom_learning_curve(estimator, X, y, train_sizes):
    """
    Generate a custom learning curve that ensures all classes are present
    in each training subset.
    """
    # Initialize arrays to store scores
    train_scores_list = []
    test_scores_list = []
    actual_train_sizes = []

    # Create stratified indices for CV
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Loop through each fold
    for train_idx, test_idx in cv.split(X, y):
        X_train_full, X_test = X[train_idx], X[test_idx]
        y_train_full, y_test = y[train_idx], y[test_idx]

        # Get indices for each class in the training set
        class_indices = [np.where(y_train_full == i)[0] for i in range(3)]

        # For each training size
        size_train_scores = []
        size_test_scores = []

        for train_size in train_sizes:
            # Calculate number of samples to use, ensuring at least 5 from each class
            n_samples = max(int(len(X_train_full) * train_size), 15)
            actual_size = min(n_samples, len(X_train_full))

            # Ensure at least 5 samples from each class
            samples_per_class = [max(5, int(actual_size / 3)) for _ in range(3)]

            # Adjust if we're requesting more samples than available
            for i in range(3):
                samples_per_class[i] = min(samples_per_class[i], len(class_indices[i]))

            # Create a stratified subset
            train_subset_idx = []
            for i in range(3):
                # Randomly select samples from each class
                selected = np.random.choice(class_indices[i], size=samples_per_class[i], replace=False)
                train_subset_idx.extend(selected)

            # Get the subset data
            X_train_subset = X_train_full[train_subset_idx]
            y_train_subset = y_train_full[train_subset_idx]

            # Verify we have all classes
            if len(np.unique(y_train_subset)) < 3:
                # If not all classes are present, force include at least one sample from each class
                for cls in range(3):
                    if cls not in y_train_subset:
                        idx = class_indices[cls][0]
                        train_subset_idx.append(idx)

                # Update the subsets
                X_train_subset = X_train_full[train_subset_idx]
                y_train_subset = y_train_full[train_subset_idx]

            # Train the model
            model = estimator.fit(X_train_subset, y_train_subset)

            # Calculate scores
            train_score = model.score(X_train_subset, y_train_subset)
            test_score = model.score(X_test, y_test)

            size_train_scores.append(train_score)
            size_test_scores.append(test_score)

            # Only record size once (for the first fold)
            if len(train_scores_list) < len(train_sizes):
                actual_train_sizes.append(len(X_train_subset))

        # Add scores for this fold
        if len(train_scores_list) < len(train_sizes):
            train_scores_list = [[score] for score in size_train_scores]
            test_scores_list = [[score] for score in size_test_scores]
        else:
            for i in range(len(train_sizes)):
                train_scores_list[i].append(size_train_scores[i])
                test_scores_list[i].append(size_test_scores[i])

    # Convert to numpy arrays
    train_scores = np.array(train_scores_list)
    test_scores = np.array(test_scores_list)

    return np.array(actual_train_sizes), train_scores, test_scores

# Define a LogisticRegression model
model = LogisticRegression(solver='lbfgs', C=10, max_iter=1000, random_state=42)

# Generate learning curve data
train_sizes_rel = np.linspace(0.3, 1.0, 5)  # Relative sizes
train_sizes, train_scores, test_scores = custom_learning_curve(model, X, y, train_sizes_rel)

# Calculate mean and standard deviation
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

# Create a Plotly figure
fig = go.Figure()

# Add training score
fig.add_trace(go.Scatter(
    x=train_sizes,
    y=train_mean,
    mode='lines+markers',
    name='Training score',
    line=dict(color='blue'),
))

# Add training score error band
fig.add_trace(go.Scatter(
    x=np.concatenate([train_sizes, train_sizes[::-1]]),
    y=np.concatenate([train_mean + train_std, (train_mean - train_std)[::-1]]),
    fill='toself',
    fillcolor='rgba(0, 0, 255, 0.1)',
    line=dict(color='rgba(255, 255, 255, 0)'),
    showlegend=False,
    name='Training score std',
))

# Add test score
fig.add_trace(go.Scatter(
    x=train_sizes,
    y=test_mean,
    mode='lines+markers',
    name='Cross-validation score',
    line=dict(color='red'),
))

# Add test score error band
fig.add_trace(go.Scatter(
    x=np.concatenate([train_sizes, train_sizes[::-1]]),
    y=np.concatenate([test_mean + test_std, (test_mean - test_std)[::-1]]),
    fill='toself',
    fillcolor='rgba(255, 0, 0, 0.1)',
    line=dict(color='rgba(255, 255, 255, 0)'),
    showlegend=False,
    name='Cross-validation score std',
))

# Update layout
fig.update_layout(
    title='Learning Curves (Logistic Regression with Cross-Entropy Loss)',
    xaxis_title='Training Set Size',
    yaxis_title='Accuracy Score',
    height=500,
    width=800,
    legend=dict(x=0.01, y=0.01),
    xaxis=dict(tickformat=',d'),
    yaxis=dict(rangemode='tozero', range=[0.0, 1.05]),
)

fig.show()