<h2>Import bibliotek</h2>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy as sp
from time import time

<h2>Zadanie 1.</h2>

<h3>Wczytanie zbiorów</h3>

In [2]:
# read names of columns
column_names = [arr[0] for arr in pd.io.parsers.read_csv("things/dataset/breast-cancer.labels", header = None).values.tolist()]

# read training data
data_training = pd.io.parsers.read_csv("things/dataset/breast-cancer-train.dat", header = None, names=column_names)

# read validation data
data_validation = pd.io.parsers.read_csv("things/dataset/breast-cancer-validate.dat", header = None, names=column_names)

<h3>Macierze danych</h3>

In [3]:
# Training data matrix
training_matrix = data_training.drop(["patient ID", "Malignant/Benign"], axis="columns").values

# Validation data matrix
validation_matrix = data_validation.drop(["patient ID", "Malignant/Benign"], axis="columns").values

<h3>Utworzenie wektora <b>b</b> z informacjami nt. typu raka dla zbiorów danych</h3>

In [4]:
# Cancer type vector for training data
data_training_type_vector = np.where(data_training["Malignant/Benign"] == "M", 1, -1)

# Cancer type vector for validation data
data_validation_type_vector = np.where(data_validation["Malignant/Benign"] == "M", 1, -1)

<h3>Funkcja spadku wzdłuż gradientu</h3>

In [5]:
# gradient descent function
def gradient_descent(A, x0, y, steps):
    AT = A.T
    ATA = AT@A
    eigenvalues, _ = np.linalg.eig(ATA)
    lr = 1 / (np.min(eigenvalues) + np.max(eigenvalues))
    c1 = np.identity(AT.shape[0]) - lr*2*ATA
    c0 = lr * 2*AT@y
    xk = x0
    for _ in range(steps):
        xk = c1@xk + c0
    return xk

<h3>Funkcja wyświetlająca wynik</h3>

In [6]:
def print_result(val_matrix, weights, val_vector, tm):
    dot_product = val_matrix @ weights
    predicted_values = np.where(dot_product > 0, 1, -1)
    TP = np.sum((predicted_values == 1) & (val_vector == 1))
    TN = np.sum((predicted_values == -1) & (val_vector == -1))
    FP = np.sum((predicted_values == 1) & (val_vector == -1))
    FN = np.sum((predicted_values == -1) & (val_vector == 1))
    acc = (TP+TN) / (TP+TN+FP+FN)
    # print out result
    print("\t\t\tActually positive:\tActually negative:")
    print(f"Predicted positive:\tTP - {TP}\t\t\tFP - {FP}")
    print(f"Predicted negative:\tFN - {FN}\t\t\tTN - {TN}")
    print(f"\tAccuracy: {"{:.3f}".format(100*acc)}%")
    print(f"\tTime: {"{:.6f}".format(tm)}s")

<h3>Obliczenie wyników</h3>

In [7]:
# calculate weights using gradient descent
t0 = time()
gd_weights = gradient_descent(training_matrix, np.zeros(30), data_training_type_vector, 5000)
t1 = time()
gd_time = t1 - t0
# calculate weights using least squares
t0 = time()
ls_weights = np.linalg.solve(
    training_matrix.T @ training_matrix, training_matrix.T @ data_training_type_vector
);
t1 = time()
ls_time = t1 - t0

<h3>Wyświetlenie wyników</h3>

In [8]:
# print gradient descent results
print("Gradient descent:")
print_result(validation_matrix, gd_weights, data_validation_type_vector, gd_time)
# print least squares results
print("\nLeast squares:")
print_result(validation_matrix, ls_weights, data_validation_type_vector, ls_time)

Gradient descent:
			Actually positive:	Actually negative:
Predicted positive:	TP - 38			FP - 0
Predicted negative:	FN - 22			TN - 200
	Accuracy: 91.538%
	Time: 0.010647s

Least squares:
			Actually positive:	Actually negative:
Predicted positive:	TP - 58			FP - 6
Predicted negative:	FN - 2			TN - 194
	Accuracy: 96.923%
	Time: 0.000169s
