In [2]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

# Load the dataset
data = pd.read_csv("/content/breast_cancer_survival.csv") # Replace "your_dataset.csv" with the path to your dataset file

# Dropping rows with missing values
data.dropna(inplace=True)

# Separate features and target variable
X = data.drop(columns=["Patient_Status"])
y = data["Patient_Status"]

# Encoding categorical variables
encoder = LabelEncoder()
X_encoded = X.copy()
for col in X.columns:
    if X[col].dtype == 'object':
            X_encoded[col] = encoder.fit_transform(X[col])

# One-hot encoding categorical variables
categorical_cols = ['Tumour_Stage', 'Histology', 'ER status', 'PR status', 'HER2 status', 'Surgery_type']
X_encoded = pd.get_dummies(X_encoded, columns=categorical_cols)

            # Convert date variables to numerical representation
date_columns = ['Date_of_Surgery', 'Date_of_Last_Visit']
for col in date_columns:
        X_encoded[col] = pd.to_datetime(X_encoded[col], errors='coerce')
        X_encoded[col] = (X_encoded[col] - datetime(1970, 1, 1)).dt.total_seconds()

                    # Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)





In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

# Load the dataset
data = pd.read_csv("/content/breast_cancer_survival.csv") # Replace "your_dataset.csv" with the path to your dataset file

# Dropping rows with missing values
data.dropna(inplace=True)

# Separate features and target variable
X = data.drop(columns=["Patient_Status"])
y = data["Patient_Status"]

# Encoding categorical variables
encoder = LabelEncoder()
X_encoded = X.copy()
for col in X.columns:
    if X[col].dtype == 'object':
        X_encoded[col] = encoder.fit_transform(X[col])

# One-hot encoding categorical variables
categorical_cols = ['Tumour_Stage', 'Histology', 'ER status', 'PR status', 'HER2 status', 'Surgery_type']
X_encoded = pd.get_dummies(X_encoded, columns=categorical_cols)

# Convert date variables to numerical representation
date_columns = ['Date_of_Surgery', 'Date_of_Last_Visit']
for col in date_columns:
    X_encoded[col] = pd.to_datetime(X_encoded[col], errors='coerce')
    X_encoded[col] = (X_encoded[col] - datetime(1970, 1, 1)).dt.total_seconds()

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Initializing Gradient Boosting classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, random_state=42)

# Training the Gradient Boosting model
gb_classifier.fit(X_train, y_train)

# Predicting on the test set using Gradient Boosting
y_pred_gb = gb_classifier.predict(X_test)

# Evaluating the Gradient Boosting model
accuracy_gb = accuracy_score(y_test, y_pred_gb)
print("Gradient Boosting Accuracy:", accuracy_gb)
print("\nGradient Boosting Classification Report:\n", classification_report(y_test, y_pred_gb))

# Encoding target variable y
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Initializing XGBoost classifier
xgb_classifier = XGBClassifier(n_estimators=100, random_state=42)

# Training the XGBoost model
xgb_classifier.fit(X_train, y_train_encoded)

# Predicting on the test set using XGBoost
y_pred_xgb = xgb_classifier.predict(X_test)

# Evaluating the XGBoost model
accuracy_xgb = accuracy_score(y_test_encoded, y_pred_xgb)
print("\nXGBoost Accuracy:", accuracy_xgb)
print("\nXGBoost Classification Report:\n", classification_report(y_test_encoded, y_pred_xgb))


Gradient Boosting Accuracy: 0.78125

Gradient Boosting Classification Report:
               precision    recall  f1-score   support

       Alive       0.80      0.96      0.88        51
        Dead       0.33      0.08      0.12        13

    accuracy                           0.78        64
   macro avg       0.57      0.52      0.50        64
weighted avg       0.71      0.78      0.72        64


XGBoost Accuracy: 0.765625

XGBoost Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.92      0.86        51
           1       0.33      0.15      0.21        13

    accuracy                           0.77        64
   macro avg       0.57      0.54      0.54        64
weighted avg       0.71      0.77      0.73        64

