In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import math
import copy
import pandas as pd
from ucimlrepo import fetch_ucirepo 

# Fetch dataset
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)

# Data (as pandas dataframes)
X = breast_cancer_wisconsin_diagnostic.data.features 
y = breast_cancer_wisconsin_diagnostic.data.targets 

# Explicitly create a copy of y to avoid SettingWithCopyWarning
y = y.copy()

# Convert Diagnosis to binary 0 or 1
y['Diagnosis'] = y['Diagnosis'].map({'M': 1, 'B': 0})

# Combine features and targets into a single DataFrame
df = pd.concat([X, y], axis=1)

print(df.head())

   radius1  texture1  perimeter1   area1  smoothness1  compactness1  \
0    17.99     10.38      122.80  1001.0      0.11840       0.27760   
1    20.57     17.77      132.90  1326.0      0.08474       0.07864   
2    19.69     21.25      130.00  1203.0      0.10960       0.15990   
3    11.42     20.38       77.58   386.1      0.14250       0.28390   
4    20.29     14.34      135.10  1297.0      0.10030       0.13280   

   concavity1  concave_points1  symmetry1  fractal_dimension1  ...  texture3  \
0      0.3001          0.14710     0.2419             0.07871  ...     17.33   
1      0.0869          0.07017     0.1812             0.05667  ...     23.41   
2      0.1974          0.12790     0.2069             0.05999  ...     25.53   
3      0.2414          0.10520     0.2597             0.09744  ...     26.50   
4      0.1980          0.10430     0.1809             0.05883  ...     16.67   

   perimeter3   area3  smoothness3  compactness3  concavity3  concave_points3  \
0      184.

In [None]:
print(df.shape)
df.isnull().sum()

In [None]:
target_column = 'Diagnosis'

X = df.drop(columns=[target_column])
y = df[target_column]


x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_data = pd.concat([x_train, y_train], axis=1)

In [None]:
train_data.head()
# Diagnosis (M = malignant, B = benign)
train_data.corr(numeric_only=True)[target_column].sort_values(ascending=False)