In [None]:
# Setting up the environment

import svelte_widget
import xgboost
import shap
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf

np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Upload and set up data

from sklearn.preprocessing import MinMaxScaler
from datetime import datetime

df = pd.read_csv("BRCA.csv")

# Deal with non-numerical variables

df = df.drop(df.columns[0], axis=1)
df['Gender'] = df['Gender'].map({"FEMALE": 0, "MALE": 1})
df['Tumour_Stage'] = df['Tumour_Stage'].map({"I": 0, "II": 1, "III": 2})
df['Histology'] = df['Histology'].map({"Infiltrating Ductal Carcinoma": 0, "Mucinous Carcinoma": 1})
df['ER status'] = df['ER status'].map({"Negative": 0, "Positive": 1})
df['PR status'] = df['PR status'].map({'Negative': 0, 'Positive': 1})
df['HER2 status'] = df['HER2 status'].map({"Negative": 0, "Positive": 1})
df['Surgery_type'] = df['Surgery_type'].map({"Modified Radical Mastectomy": 0, "Lumpectomy": 1, "Simple Mastectomy": 2})
df['Patient_Status'] = df['Patient_Status'].map({"Dead": 0, "Alive": 1})
# Temporal variables for now are not considered
'''
for i in range(0, df.shape[0]):
    df.loc[i, 'Date_of_Surgery'] = datetime.strptime(df.loc[i, 'Date_of_Surgery'], '%d-%b-%y')
    df.loc[i, 'Date_of_Last_Visit'] = datetime.strptime(df.loc[i, 'Date_of_Last_Visit'], '%d-%b-%y')
'''
df = df.drop(['Date_of_Surgery', 'Date_of_Last_Visit'], axis=1)

df = df.dropna()

# Initialize the scaler
scaler = MinMaxScaler()

# Apply the scaler to every column in the dataframe
stdDf = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

stdX = stdDf.drop(columns=['Patient_Status'])

# We do not want to scale Y, so widget will display real values
# for predictions and not scaled ones
stdY = stdDf['Patient_Status']

In [None]:
df

In [None]:
# Defining and training the two models

from sklearn.linear_model import LinearRegression

stdX_train, stdX_test, stdy_train, stdy_test = train_test_split(stdX, stdY, test_size=0.2, random_state=42)

# Model1
stdmodel1 = xgboost.XGBRegressor(random_state=42).fit(stdX_train,stdy_train)

# Model2
stdmodel2 = LinearRegression().fit(stdX_train, stdy_train)

In [None]:
z = svelte_widget.ExampleWidget(stdX_test,stdy_test,stdmodel1.predict,stdmodel2.predict)
z

In [None]:
# Let's try naive Bayes

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Define the Gaussian Naive Bayes model
modelNB = GaussianNB()

# Train the model
modelNB.fit(stdX_train, stdy_train)

# Predict on the test set
stdy_pred = modelNB.predict(stdX_test)

# Calculate accuracy
accuracy = accuracy_score(stdy_test, stdy_pred)
print("Accuracy:", accuracy)

In [None]:
w = svelte_widget.ExampleWidget(stdX_test,stdy_test,stdmodel1.predict,modelNB.predict)
w