In [72]:
import pandas as pd
import numpy as np
import random
import math
import random
from logistic_regression import *

In [65]:
def generate_chaotic_vectors(M, N):
    vectors = []
    for x in range(M):
        x0 = random.uniform(1e-10, 1 - 1e-10)
        trajectory = [x0]
        for y in range(N - 1):
            trajectory.append(4 * trajectory[-1] * (1 - trajectory[-1]))
        vectors.append(trajectory)
    return vectors

In [66]:
def cosine_sign_projection(data, chaotic_vectors):
    projected_data = []

    for sample in data:
        projection = []
        for chaos in chaotic_vectors:
            chaos = [2 * x - 1 for x in chaos]
            dot = sum(s * c for s, c in zip(sample, chaos))
            norm_sample = math.sqrt(sum(s ** 2 for s in sample))
            norm_chaos = math.sqrt(sum(c ** 2 for c in chaos))

            if norm_sample == 0 or norm_chaos == 0:
                cos_sim = 0
            else:
                cos_sim = dot / (norm_sample * norm_chaos)

            projection.append(1 if cos_sim >= 0 else -1)

        projected_data.append(projection)

    return projected_data

In [68]:
def train_test_split(X, y, test_ratio=0.2):
    combined = list(zip(X, y))
    random.shuffle(combined)
    split_index = int(len(combined) * (1 - test_ratio))
    train_data = combined[:split_index]
    test_data = combined[split_index:]

    X_train, y_train = zip(*train_data)
    X_test, y_test = zip(*test_data)

    return list(X_train), list(y_train), list(X_test), list(y_test)

In [67]:
def calculate_metrics(y_true, y_pred):
    tp = sum(1 for yt, yp in zip(y_true, y_pred) if yt == yp == 1)
    tn = sum(1 for yt, yp in zip(y_true, y_pred) if yt == yp == 0)
    fp = sum(1 for yt, yp in zip(y_true, y_pred) if yt == 0 and yp == 1)
    fn = sum(1 for yt, yp in zip(y_true, y_pred) if yt == 1 and yp == 0)

    accuracy = (tp + tn) / len(y_true)
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

    return {
        'accuracy': round(accuracy, 4),
        'precision': round(precision, 4),
        'recall': round(recall, 4),
        'f1_score': round(f1, 4)
    }

In [69]:
df = pd.read_csv(r"Breast Cancer Wisconsin (Diagnostic) Data Set.csv")

df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df = df.drop(columns=['id'])

df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

y = df['diagnosis'].values
X = df.drop(columns=['diagnosis']).values

mean = X.mean(axis=0)
std = X.std(axis=0)
std[std == 0] = 1
X_norm = (X - mean) / std

X, y = X_norm.tolist(), y.tolist()
(len(X), len(X[0])), len(y), y[:5]

((569, 30), 569, [1, 1, 1, 1, 1])

In [70]:
chaotic_vectors = generate_chaotic_vectors(5, len(X[0]))
projected_X = cosine_sign_projection(X, chaotic_vectors)

In [78]:
X_train_normal, y_train_normal, X_test_normal, y_test_normal = train_test_split(X, y, 0.2)
X_train_chaotic, y_train_chaotic, X_test_chaotic, y_test_chaotic = train_test_split(projected_X, y, 0.2)
w_normal, b_normal = train_logistic_regression(X_train_normal, y_train_normal)
w_chaotic, b_chaotic = train_logistic_regression(X_train_chaotic, y_train_chaotic)

In [79]:
y_pred_normal = predict_logistic_regression(X_test_normal, w_normal, b_normal)
y_pred_chaotic = predict_logistic_regression(X_test_chaotic, w_chaotic, b_chaotic)

In [80]:
metrics_logistic = calculate_metrics(y_test_normal, y_pred_normal)
metrics_chaotic = calculate_metrics(y_test_chaotic, y_pred_chaotic)

In [81]:
print(metrics_chaotic)

{'accuracy': 0.886, 'precision': 0.8378, 'recall': 0.8158, 'f1_score': 0.8267}


In [82]:
print(metrics_logistic)

{'accuracy': 0.9649, 'precision': 0.9767, 'recall': 0.9333, 'f1_score': 0.9545}
