In [7]:
import pandas as pd
import numpy as np
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Incarcam dataset-ul
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)
X = breast_cancer_wisconsin_diagnostic.data.features
y = breast_cancer_wisconsin_diagnostic.data.targets

# Selectam doar caracteristicile relevante: radius1 si texture1
X_selected = X[['radius1', 'texture1']]

# Convertim etichetele: M (malign) = 1, B (benign) = 0
y = y['Diagnosis'].map({'M': 1, 'B': 0})

# Impartim datele in set de antrenare (80%) si testare (20%)
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Normalizăm datele
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Antrenam modelul de regresie logistică
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Evalua m modelul pe setul de testare
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Acuratețea pe setul de testare:", accuracy)
print("\nRaport de clasificare:")
print(classification_report(y_test, y_pred, target_names=['Benign', 'Malign']))

# Facem predictia pentru leziunea data: raza = 18, textura = 10
new_lesion = np.array([[18, 10]])
new_lesion_scaled = scaler.transform(new_lesion)
prediction = model.predict(new_lesion_scaled)
prediction_proba = model.predict_proba(new_lesion_scaled)

# Afișăm rezultatul predicției
print("\nPredictie pentru leziunea cu raza = 18 si textura = 10:")
print("Clasa prezisa:", "Malign" if prediction[0] == 1 else "Benign")
print("Probabilitati (Benign, Malign):", prediction_proba[0])

Acuratețea pe setul de testare: 0.9035087719298246

Raport de clasificare:
              precision    recall  f1-score   support

      Benign       0.91      0.94      0.92        71
      Malign       0.90      0.84      0.87        43

    accuracy                           0.90       114
   macro avg       0.90      0.89      0.90       114
weighted avg       0.90      0.90      0.90       114


Predicție pentru leziunea cu raza = 18 și textura = 10:
Clasa prezisă: Malign
Probabilități (Benign, Malign): [0.31547388 0.68452612]




In [3]:
print(X.columns)
print(X.columns)


Index(['radius1', 'texture1', 'perimeter1', 'area1', 'smoothness1',
       'compactness1', 'concavity1', 'concave_points1', 'symmetry1',
       'fractal_dimension1', 'radius2', 'texture2', 'perimeter2', 'area2',
       'smoothness2', 'compactness2', 'concavity2', 'concave_points2',
       'symmetry2', 'fractal_dimension2', 'radius3', 'texture3', 'perimeter3',
       'area3', 'smoothness3', 'compactness3', 'concavity3', 'concave_points3',
       'symmetry3', 'fractal_dimension3'],
      dtype='object')


In [17]:
import pandas as pd
import numpy as np
from ucimlrepo import fetch_ucirepo

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def normalize_data(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    X_normalized = (X - mean) / std
    return X_normalized, mean, std

def normalize_new_data(X, mean, std):
    return (X - mean) / std

class ManualLogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for epoch in range(self.epochs):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = sigmoid(linear_model)
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict_proba(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = sigmoid(linear_model)
        return np.vstack((1 - y_predicted, y_predicted)).T

    def predict(self, X):
        probabilities = self.predict_proba(X)[:, 1]
        return (probabilities >= 0.5).astype(int)

breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)
X = breast_cancer_wisconsin_diagnostic.data.features
y = breast_cancer_wisconsin_diagnostic.data.targets

X_selected = X[['radius1', 'texture1']].values

y = y['Diagnosis'].map({'M': 1, 'B': 0}).values

np.random.seed(42)
indices = np.arange(len(X_selected))
train_size = int(0.8 * len(X_selected))
train_indices = indices[:train_size]
test_indices = indices[train_size:]

X_train = X_selected[train_indices]
X_test = X_selected[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]

X_train_scaled, mean, std = normalize_data(X_train)
X_test_scaled = normalize_new_data(X_test, mean, std)

model = ManualLogisticRegression(learning_rate=0.01, epochs=10000)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
accuracy = np.mean(y_pred == y_test)
print("Accuracy on test set (manual):", accuracy)

def classification_metrics(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))

    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1

precision_benign, recall_benign, f1_benign = classification_metrics(y_test == 0, y_pred == 0)
precision_malign, recall_malign, f1_malign = classification_metrics(y_test == 1, y_pred == 1)

print("\nClassification report (manual):")
print("Class Benign:")
print(f"  Precision: {precision_benign:.3f}")
print(f"  Recall: {recall_benign:.3f}")
print(f"  F1-score: {f1_benign:.3f}")
print("Class Malign:")
print(f"  Precision: {precision_malign:.3f}")
print(f"  Recall: {recall_malign:.3f}")
print(f"  F1-score: {f1_malign:.3f}")

new_lesion = np.array([[18, 10]])
new_lesion_scaled = normalize_new_data(new_lesion, mean, std)
prediction = model.predict(new_lesion_scaled)
prediction_proba = model.predict_proba(new_lesion_scaled)

print("\nPrediction for lesion with radius = 18 and texture = 10 (manual):")
print("Predicted class:", "Malign" if prediction[0] == 1 else "Benign")
print("Probabilities (Benign, Malign):", prediction_proba[0])

Accuracy on test set (manual): 0.8245614035087719

Classification report (manual):
Class Benign:
  Precision: 0.986
  Recall: 0.784
  F1-score: 0.873
Class Malign:
  Precision: 0.568
  Recall: 0.962
  F1-score: 0.714

Prediction for lesion with radius = 18 and texture = 10 (manual):
Predicted class: Malign
Probabilities (Benign, Malign): [0.34349151 0.65650849]
