In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import numpy as np
from diveai.plotting import PlotBuilder, HeatmapPlotBuilder
from diveai.models import LogisticRegression
from diveai.metrics import accuracy_score, r2_score

# Logistic Regression

## One Feature

In [3]:
np.random.seed(0)
X = np.random.randn(100, 1)
y = (X.flatten() + np.random.randn(100)*0.5 > 0).astype(int)

In [4]:
pb = PlotBuilder(title="Synthetic Data for Logistic Regression", x_label="Feature", y_label="Target")
class_0, class_1 = X[y == 0].flatten(), X[y == 1].flatten()
pb.add_plot(class_0, np.zeros_like(class_0), plot_type='scatter', label='Class 0', marker_symbol='circle', color='navy', size=10)
pb.add_plot(class_1, np.ones_like(class_1), plot_type='scatter', label='Class 1', marker_symbol='triangle-up', color='green', size=10)
pb.show()

FigureWidget({
    'data': [{'marker': {'color': 'navy', 'opacity': 1, 'size': 10, 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'Class 0',
              'type': 'scatter',
              'uid': '3299da5d-306e-4442-9931-888d907d1109',
              'x': array([ 0.40015721, -0.97727788, -0.15135721, -0.20515826, -0.85409574,
                          -2.55298982, -0.74216502, -1.45436567,  0.04575852,  0.15494743,
                          -0.88778575, -1.98079647, -0.34791215, -0.38732682, -0.30230275,
                          -1.04855297, -1.42001794, -1.70627019, -1.25279536, -1.61389785,
                          -0.21274028, -0.89546656, -0.51080514, -1.18063218, -0.02818223,
                           0.3024719 , -0.63432209, -0.36274117, -0.67246045, -0.81314628,
                          -1.7262826 ,  0.17742614, -1.63019835,  0.46278226, -0.90729836,
                          -1.23482582,  0.40234164, -0.68481009, -0.87079715, -0.57884966,
        

In [8]:
model = LogisticRegression(learning_rate=0.1, iterations=2000)
logs = model.fit(X, y)

y_pred = model.predict(X)

accuracy = accuracy_score(y.flatten(), y_pred.flatten())

# Print results
print("Predictions:", y_pred.flatten())
print("Accuracy:", accuracy)

Predictions: [1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0 1 1
 1 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 1 0 0 1 0 1 1 1 1 0
 1 0 0 0 0 1 0 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1]
Accuracy: 0.86


In [6]:
cost_logs = logs['cost']

pb = PlotBuilder(title="Cross Entropy Loss vs. Iterations", x_label="Iterations", y_label="Cross Entropy Loss")
pb.add_plot(np.arange(len(cost_logs)), cost_logs, plot_type='line', label='MSE')
pb.show()

FigureWidget({
    'data': [{'line': {'color': 'red'},
              'marker': {'color': 'red', 'opacity': 1, 'size': 5},
              'mode': 'lines',
              'name': 'MSE',
              'type': 'scatter',
              'uid': 'd98c250d-3604-4993-80a3-a8ced6298310',
              'x': array([   0,    1,    2, ..., 1997, 1998, 1999], shape=(2000,)),
              'y': [0.6931471805599453, 0.6798097995671618, 0.6671513149038113,
                    ..., 0.3097984132651487, 0.30979835465153754,
                    0.3097982962421187]}],
    'layout': {'template': '...',
               'title': {'text': 'Cross Entropy Loss vs. Iterations'},
               'xaxis': {'title': {'text': 'Iterations'}},
               'yaxis': {'title': {'text': 'Cross Entropy Loss'}}}
})

In [7]:
probabilities = model.predict_proba(X)
sorted_idx = np.argsort(X.flatten())
X_sorted = X[sorted_idx]
prob_sorted = probabilities[sorted_idx]

class_0, class_1 = X[y == 0].flatten(), X[y == 1].flatten()

pb = PlotBuilder(title="Logistic Regression Classification", x_label="Feature", y_label="Predicted Probability")

pb.add_plot(class_0, np.zeros_like(class_0), plot_type='scatter', label='Class 0', marker_symbol='circle', color='navy', size=10)
pb.add_plot(class_1, np.ones_like(class_1), plot_type='scatter', label='Class 1', marker_symbol='triangle-up', color='green', size=10)

pb.add_plot(X_sorted.flatten(), prob_sorted.flatten(), plot_type='line', label='Probabilities', color='red')

# Calculate actual decision boundary point (where probability = 0.5)
intercept = model.bias
coef = model.weights[0][0]
boundary_x = -intercept/coef if coef != 0 else 0

pb.add_plot([boundary_x, boundary_x], [-0.2, 1.2], plot_type='line', color='black', label=f'Decision Boundary (X={boundary_x:.2f})')
pb.show()

FigureWidget({
    'data': [{'marker': {'color': 'navy', 'opacity': 1, 'size': 10, 'symbol': 'circle'},
              'mode': 'markers',
              'name': 'Class 0',
              'type': 'scatter',
              'uid': '26507cbe-ff60-4b56-aa8f-d3b271c0b56d',
              'x': array([ 0.40015721, -0.97727788, -0.15135721, -0.20515826, -0.85409574,
                          -2.55298982, -0.74216502, -1.45436567,  0.04575852,  0.15494743,
                          -0.88778575, -1.98079647, -0.34791215, -0.38732682, -0.30230275,
                          -1.04855297, -1.42001794, -1.70627019, -1.25279536, -1.61389785,
                          -0.21274028, -0.89546656, -0.51080514, -1.18063218, -0.02818223,
                           0.3024719 , -0.63432209, -0.36274117, -0.67246045, -0.81314628,
                          -1.7262826 ,  0.17742614, -1.63019835,  0.46278226, -0.90729836,
                          -1.23482582,  0.40234164, -0.68481009, -0.87079715, -0.57884966,
        

In [14]:
cm = np.array([[45, 5], [3, 47]])
hb = HeatmapPlotBuilder(colorscale='Greens', show_scale=True)
hb.add_confusion_matrix(cm, class_labels=['Cat', 'Dog'])
hb.show()


FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgb(247,252,245)'], [0.125,
                             'rgb(229,245,224)'], [0.25, 'rgb(199,233,192)'],
                             [0.375, 'rgb(161,217,155)'], [0.5,
                             'rgb(116,196,118)'], [0.625, 'rgb(65,171,93)'], [0.75,
                             'rgb(35,139,69)'], [0.875, 'rgb(0,109,44)'], [1.0,
                             'rgb(0,68,27)']],
              'hoverinfo': 'x+y+z',
              'showscale': True,
              'text': array([[45,  5],
                             [ 3, 47]]),
              'texttemplate': '%{z}',
              'type': 'heatmap',
              'uid': 'af65c137-b73f-4e53-a319-f55485675e20',
              'x': [Cat, Dog],
              'y': [Cat, Dog],
              'z': array([[45,  5],
                          [ 3, 47]])}],
    'layout': {'margin': {'b': 100, 'l': 100, 'r': 50, 't': 30},
               'plot_bgcolor': 'white',
               'template': '...',
          