### Importing dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Device config

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

### Data Collection and Analysis

In [None]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [None]:
print(breast_cancer_dataset)
print("keys: " , breast_cancer_dataset.keys())

In [None]:
data_frame = pd.DataFrame(breast_cancer_dataset['data'], columns=breast_cancer_dataset['feature_names'])

In [None]:
data_frame.head()

In [None]:
data_frame['target'] = breast_cancer_dataset['target']

In [None]:
data_frame.shape
data_frame.info()

In [None]:
data_frame.describe()

In [None]:
data_frame['target'].value_counts()

1 -> Benign \
0 -> Malignant 

### Splitting the data

In [None]:
X = data_frame.drop(labels='target', axis=1)
y = data_frame['target'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

### Standardize the data

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### Distribution of the first feature and mean

In [None]:
feature_number = 0

In [None]:
# Plot a histogram of the first feature
plt.hist(X_train[breast_cancer_dataset.feature_names[0]], bins=20, color='blue', alpha=0.7)
plt.title("Distribution of " + breast_cancer_dataset.feature_names[0])
plt.xlabel(breast_cancer_dataset.feature_names[0])
plt.ylabel("Frequency")
plt.show()

In [None]:
print("mean of the first feature before scaling",
      X_train[breast_cancer_dataset.feature_names[0]].mean())

In [None]:
# Plot a histogram of the first feature
data = []

for i in range(0, len(X_train_scaled)):
    data.append(X_train_scaled[i][feature_number])

plt.hist(data, bins=20, color='blue', alpha=0.7)
plt.title("Distribution of " + breast_cancer_dataset.feature_names[0])
plt.xlabel(breast_cancer_dataset.feature_names[0])
plt.ylabel("Frequency")
plt.show()

In [None]:
print("mean of the first feature after scaling",
      X_train_scaled[feature_number].mean())

### Converting the data to tensors

In [None]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device=device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device=device)