In [2]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (15, 10)
#import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [3]:
dataset = load_breast_cancer()

In [4]:
print(dataset.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, f

In [5]:
data = pd.DataFrame(dataset.data, columns=dataset.feature_names)
data['target'] = dataset.target

In [6]:
X, y = data.drop('target', axis=1), data[['target']]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2020)

In [8]:
from sklearn.linear_model import LogisticRegressionCV

In [9]:
from sklearn.metrics import roc_auc_score
model = LogisticRegressionCV(max_iter=1000)
model.fit(X_train, y_train)
print(roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

0.9906759906759908


In [10]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [18]:
class MyLogisticRegression:
    def __init__(self):
        self.n_iter = 300
        self.lambda_ = 0.1
    
    def fit(self, X, y):
        X = X.copy()
        X = np.concatenate([np.ones((X.shape[0], 1)), X], axis=1)
        self.w = np.random.randn(X.shape[1], 1)
        #print(self.w.T)
        #print(self.w.shape)
        print(X.shape)
        for it in range(self.n_iter):
            y_pred = self.predict(X)
            grad = X.T @(y * (1 - y_pred) + (1 - y) * y_pred) / len(X)
            #grad = X.T @ (y_pred - y) / len(X)
            self.w = self.w - self.lambda_ * grad
            #print(self.w)
    
    def predict(self, X):
        z = X @ self.w
        return self.sigmoid(z)
    
    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

In [19]:
lr = MyLogisticRegression()
lr.fit(X_train, y_train.values)
print(X_train.shape, lr.w.shape, "asd")

(381, 31)
(381, 30) (31, 1) asd


In [74]:
lr.w.shape,X_train.shape

((31, 1), (381, 30))

In [57]:
X_ = np.concatenate([np.ones((X_test.shape[0], 1)), X_test], axis=1)
y_pred = lr.predict(X_)

[[-47683132.55639368]
 [ -9668144.04674911]
 [-11426993.461706  ]
 [-23061201.66084594]
 [-11345776.04641483]
 [-13075449.02861086]
 [-29486230.94869654]
 [-15607711.74320944]
 [-27003722.37060621]
 [-29907494.60835598]
 [-13113312.78355277]
 [-14790804.40383798]
 [-23774584.34891165]
 [ -5504677.86411197]
 [ -6997430.95437616]
 [ -9747525.08569672]
 [-11799411.80172743]
 [-10368360.22877039]
 [-55903318.99129104]
 [-17197781.06966631]
 [-26156417.99157029]
 [-30969440.22195913]
 [-21385119.30260138]
 [ -8481324.23617788]
 [-15050208.66173423]
 [-12018757.74642284]
 [-19899529.08994845]
 [-11391850.46197907]
 [ -9031814.06456332]
 [-48723480.10016793]
 [-12343021.12820831]
 [-13419339.18113793]
 [ -9250159.56457245]
 [ -5731503.35108657]
 [-29796570.35845715]
 [-11065465.77730766]
 [-17172466.90575344]
 [-24402307.7632597 ]
 [ -7055983.5213661 ]
 [-11920494.99866407]
 [-14283056.01231953]
 [-11676725.49756278]
 [ -8417594.74885178]
 [ -9064376.35700438]
 [-27111825.04585406]
 [-1298896

In [58]:
y_pred

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [15]:
from sklearn.metrics import roc_auc_score

In [1]:
roc_auc_score(y_test, y_pred)

NameError: name 'roc_auc_score' is not defined