Data Manipulation using given csv file from the Cancer Imaging Archive

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

In [None]:
clinical_data = pd.read_csv('/content/HCC-TACE-Seg_clinical_data-V2-final.csv')
clinical_data.head()

In [None]:
hepatitis = pd.get_dummies(clinical_data['hepatitis'])
hepatitis = hepatitis.iloc[: , :-1]
hepatitis.head()

In [None]:
age_group = pd.get_dummies(clinical_data['agegp'])
age_group = age_group.drop(['<=40'], axis=1)
age_group.head()

In [None]:
chemo = pd.get_dummies(clinical_data['chemotherapy'])
chemo.head()

In [None]:
clinical_data = pd.concat([clinical_data, hepatitis, age_group, chemo], axis=1)
clinical_data.head()

In [None]:
clinical_data = clinical_data.drop(['TCIA_ID', 'hepatitis', 'agegp', 'AFP_group', 'chemotherapy'], axis=1)
clinical_data.head()

In [None]:
clinical_data.isna()

In [None]:
clinical_data = clinical_data.drop([105,106], axis = 0)
clinical_data.isna()

In [None]:
clinical_data = clinical_data.drop(['fhx_can', 'fhx_livc', 'Personal history of cancer'], axis=1)
clinical_data.head()
# 'Cisplastin', 'Cisplatin, Mitomycin-C','Cisplatin, doxorubicin, Mitomycin-C','doxorubicin LC beads'

Model Build

In [None]:
X = clinical_data.drop('Censored_0_progressed_1', axis=1)
Y = clinical_data['Censored_0_progressed_1']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg_model = LogisticRegression(max_iter=2500,
                                   random_state=42)

In [None]:
# Train (fit) the model
log_reg_model.fit(X_train, Y_train)

Model Testing and Analysis

In [None]:
y_pred = log_reg_model.predict(X_test) # Predictions
y_true = Y_test # True values

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

In [None]:
print("Accuracy:", np.round(accuracy_score(y_true, y_pred), 2))
precision, recall, fscore, _ = precision_recall_fscore_support(y_true, y_pred,
                                                               average='binary')
print("Precision:", np.round(precision, 2))
print("Recall:", np.round(recall, 2))
print("F-Score:", np.round(fscore, 2))

# Make the confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
cf_matrix = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix")
sns.heatmap(cf_matrix, annot=True, cmap='Blues')
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('True', fontsize=12)