In [173]:
import numpy
import math

class LogisticRegression:
    
    def __init__(self, n, epochs, alpha):
        self.__w = numpy.full((1, n), 1, dtype=float)
        self.__n = n
        self.__epochs = epochs
        self.__alpha = alpha
        
    def __h(self, x_i):
        w_T = numpy.transpose(self.__w)
        s = numpy.matmul(x_i, w_T)
        return 1 / (1 + numpy.exp(-1 * s[0]))
        
    def train_model(self, x_train, y_train, m):
        for e in range(self.__epochs):
            grad = numpy.zeros(self.__n, dtype=float)
            for i in range(m):
                x_i = x_train[i]
                y_i = y_train[i][0]
                h_i = self.__h(x_i)
                error = y_i - h_i
                grad += (error * h_i * (1 - h_i) * x_i)
            self.__w += (self.__alpha * grad)
            
    def test_model(self, x_test, y_test, m):
        match = 0
        for i in range(m):
            x_i = x_test[i]
            y_i = y_test[i]
            result = self.decide(x_i)
            if result == y_i:
                match += 1
        return match / m
                
    def decide(self, x):
        h = self.__h(x)
        if h > 0.5:
            return 1
        else:
            return 0

In [176]:
import pandas
import numpy
from sklearn.model_selection import train_test_split

dataframe = pandas.read_csv('data.csv').dropna(how='all', axis='columns')

# x está usando as colunas 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean'
# pois elas possuem dados de mesma ordem de grandeza
# y está usando a coluna 'diagnosis' para treinar o modelo de classificação
x = dataframe.drop(columns=['id', 'diagnosis',
                    'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
                    'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se',
                    'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst'])
y = dataframe.drop(columns=['id',
                    'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
                    'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se',
                    'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst'])
y = y.replace('B', 0).replace('M', 1)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

m_train = len(x_train)
m_test = len(x_test)
n = len(x_train[0])

model = LogisticRegression(n, 100, 0.03)
model.train_model(x_train, y_train, m_train)
rate = model.test_model(x_test, y_test, m_test)
print("rate:", rate)

rate: 0.8596491228070176
