# THYROID CANCER RECURRANCE DETECTOR MODEL

### Import All Necessary Libraries

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from flask import Flask, request, jsonify
import streamlit as st
import warnings
warnings.filterwarnings("ignore")

### Import The Dataset

In [27]:
df = pd.read_csv(r"C:\Users\Hp\Documents\Zion Tech\Thyroid.csv")
df.head()

Unnamed: 0,Age,Gender,Smoking,Hx Smoking,Hx Radiothreapy,Thyroid Function,Physical Examination,Adenopathy,Pathology,Focality,Risk,T,N,M,Stage,Response,Recurred
0,27,F,No,No,No,Euthyroid,Single nodular goiter-left,No,Micropapillary,Uni-Focal,Low,T1a,N0,M0,I,Indeterminate,No
1,34,F,No,Yes,No,Euthyroid,Multinodular goiter,No,Micropapillary,Uni-Focal,Low,T1a,N0,M0,I,Excellent,No
2,30,F,No,No,No,Euthyroid,Single nodular goiter-right,No,Micropapillary,Uni-Focal,Low,T1a,N0,M0,I,Excellent,No
3,62,F,No,No,No,Euthyroid,Single nodular goiter-right,No,Micropapillary,Uni-Focal,Low,T1a,N0,M0,I,Excellent,No
4,62,F,No,No,No,Euthyroid,Multinodular goiter,No,Micropapillary,Multi-Focal,Low,T1a,N0,M0,I,Excellent,No


### Exploring The Dataset

In [28]:
df.shape

(383, 17)

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 383 entries, 0 to 382
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Age                   383 non-null    int64 
 1   Gender                383 non-null    object
 2   Smoking               383 non-null    object
 3   Hx Smoking            383 non-null    object
 4   Hx Radiothreapy       383 non-null    object
 5   Thyroid Function      383 non-null    object
 6   Physical Examination  383 non-null    object
 7   Adenopathy            383 non-null    object
 8   Pathology             383 non-null    object
 9   Focality              383 non-null    object
 10  Risk                  383 non-null    object
 11  T                     383 non-null    object
 12  N                     383 non-null    object
 13  M                     383 non-null    object
 14  Stage                 383 non-null    object
 15  Response              383 non-null    ob

In [30]:
df.describe()

Unnamed: 0,Age
count,383.0
mean,40.866841
std,15.134494
min,15.0
25%,29.0
50%,37.0
75%,51.0
max,82.0


### Empty Cells and Duplicates

In [31]:
# Checking for empty cells and duplicates
print("Empty cell : ", df.isna().sum().sum())
print("Duplicates : ", df.duplicated().sum())

Empty cell :  0
Duplicates :  19


In [32]:
# Droping the duplicates
df = df.drop_duplicates()
df.shape

(364, 17)

### Encoding The Dataset

In [33]:
df.columns

Index(['Age', 'Gender', 'Smoking', 'Hx Smoking', 'Hx Radiothreapy',
       'Thyroid Function', 'Physical Examination', 'Adenopathy', 'Pathology',
       'Focality', 'Risk', 'T', 'N', 'M', 'Stage', 'Response', 'Recurred'],
      dtype='object')

In [34]:
for col in df.columns:
    print(col,"has:", df[col].nunique())

Age has: 65
Gender has: 2
Smoking has: 2
Hx Smoking has: 2
Hx Radiothreapy has: 2
Thyroid Function has: 5
Physical Examination has: 5
Adenopathy has: 6
Pathology has: 4
Focality has: 2
Risk has: 3
T has: 7
N has: 3
M has: 2
Stage has: 5
Response has: 4
Recurred has: 2


In [35]:
# Encoding these columns; Gender, Smoking, Hx Smoking, Hx Radiothreapy, T, N, M, Stage, Focality, Recurred using labelencoder
columns = ["Gender", "Smoking", "Hx Smoking", "Hx Radiothreapy", "T", "N", "M", "Stage", "Focality", "Recurred"]
for i in columns:
    encoder = LabelEncoder()
    df[i] = encoder.fit_transform(df[i])

df.head()

Unnamed: 0,Age,Gender,Smoking,Hx Smoking,Hx Radiothreapy,Thyroid Function,Physical Examination,Adenopathy,Pathology,Focality,Risk,T,N,M,Stage,Response,Recurred
0,27,0,0,0,0,Euthyroid,Single nodular goiter-left,No,Micropapillary,1,Low,0,0,0,0,Indeterminate,0
1,34,0,0,1,0,Euthyroid,Multinodular goiter,No,Micropapillary,1,Low,0,0,0,0,Excellent,0
2,30,0,0,0,0,Euthyroid,Single nodular goiter-right,No,Micropapillary,1,Low,0,0,0,0,Excellent,0
3,62,0,0,0,0,Euthyroid,Single nodular goiter-right,No,Micropapillary,1,Low,0,0,0,0,Excellent,0
4,62,0,0,0,0,Euthyroid,Multinodular goiter,No,Micropapillary,0,Low,0,0,0,0,Excellent,0


In [36]:
# Encoding the Risk column
df["Risk"].unique()

array(['Low', 'Intermediate', 'High'], dtype=object)

In [37]:
df["Risk"] = df["Risk"].map({"Low" : 0, "Intermediate" : 1, "High" : 2})
df.head()

Unnamed: 0,Age,Gender,Smoking,Hx Smoking,Hx Radiothreapy,Thyroid Function,Physical Examination,Adenopathy,Pathology,Focality,Risk,T,N,M,Stage,Response,Recurred
0,27,0,0,0,0,Euthyroid,Single nodular goiter-left,No,Micropapillary,1,0,0,0,0,0,Indeterminate,0
1,34,0,0,1,0,Euthyroid,Multinodular goiter,No,Micropapillary,1,0,0,0,0,0,Excellent,0
2,30,0,0,0,0,Euthyroid,Single nodular goiter-right,No,Micropapillary,1,0,0,0,0,0,Excellent,0
3,62,0,0,0,0,Euthyroid,Single nodular goiter-right,No,Micropapillary,1,0,0,0,0,0,Excellent,0
4,62,0,0,0,0,Euthyroid,Multinodular goiter,No,Micropapillary,0,0,0,0,0,0,Excellent,0


In [38]:
# Encoding the pathology and response columns
print(df["Pathology"].unique())
print(df["Response"].unique())

['Micropapillary' 'Papillary' 'Follicular' 'Hurthel cell']
['Indeterminate' 'Excellent' 'Structural Incomplete'
 'Biochemical Incomplete']


In [39]:
# Since the pathology response and Adenopathy columns are ordinal will can use orddinalencoder or mapping
encoder1 = OrdinalEncoder(categories = [["Papillary", "Follicular", "Hurthel cell", "Micropapillary"]])
df["Pathology"] = encoder1.fit_transform(df[["Pathology"]])
df["Pathology"]

0      3.0
1      3.0
2      3.0
3      3.0
4      3.0
      ... 
378    0.0
379    0.0
380    0.0
381    2.0
382    0.0
Name: Pathology, Length: 364, dtype: float64

In [40]:
# Encoding the response column by mapping
df["Response"] = df["Response"].map({"Biochemical Incomplete" : 0, "Structural Incomplete" : 1, "Indeterminate" : 2, "Excellent" : 3})
df["Response"]

0      2
1      3
2      3
3      3
4      3
      ..
378    0
379    1
380    1
381    1
382    1
Name: Response, Length: 364, dtype: int64

In [41]:
# Encoding the adenopathy column by mapping
df["Adenopathy"].unique()

array(['No', 'Right', 'Extensive', 'Left', 'Bilateral', 'Posterior'],
      dtype=object)

In [42]:
df["Adenopathy"] = df["Adenopathy"].map({"No" : 0, "Right": 1, "Left" : 1, "Posterior" : 2, "Bilateral" : 3, "Extensive" : 4})
df["Adenopathy"]

0      0
1      0
2      0
3      0
4      0
      ..
378    1
379    4
380    3
381    4
382    3
Name: Adenopathy, Length: 364, dtype: int64

In [43]:
# Emcoding the Physical Examination
df["Physical Examination"] = df["Physical Examination"].map({"Normal" : 0,
                                                            "Diffuse goiter" : 1,
                                                            "Single nodular goiter-left" : 2,
                                                            "Single nodular goiter-right" : 2,
                                                            "Multinodular goiter" : 3})

In [44]:
df.head()

Unnamed: 0,Age,Gender,Smoking,Hx Smoking,Hx Radiothreapy,Thyroid Function,Physical Examination,Adenopathy,Pathology,Focality,Risk,T,N,M,Stage,Response,Recurred
0,27,0,0,0,0,Euthyroid,2,0,3.0,1,0,0,0,0,0,2,0
1,34,0,0,1,0,Euthyroid,3,0,3.0,1,0,0,0,0,0,3,0
2,30,0,0,0,0,Euthyroid,2,0,3.0,1,0,0,0,0,0,3,0
3,62,0,0,0,0,Euthyroid,2,0,3.0,1,0,0,0,0,0,3,0
4,62,0,0,0,0,Euthyroid,3,0,3.0,0,0,0,0,0,0,3,0


In [45]:
# Encoding the thyroid function unsing get_dummies
df1 =pd.get_dummies(df["Thyroid Function"]).astype(int)
df1

Unnamed: 0,Clinical Hyperthyroidism,Clinical Hypothyroidism,Euthyroid,Subclinical Hyperthyroidism,Subclinical Hypothyroidism
0,0,0,1,0,0
1,0,0,1,0,0
2,0,0,1,0,0
3,0,0,1,0,0
4,0,0,1,0,0
...,...,...,...,...,...
378,0,0,1,0,0
379,0,0,1,0,0
380,0,0,1,0,0
381,1,0,0,0,0


### Merging The Dummies dataset (df1) with the dataset (df) and Droping of The Thyroid Function

In [46]:
df2 = pd.concat([df, df1], axis = 1)
df2

Unnamed: 0,Age,Gender,Smoking,Hx Smoking,Hx Radiothreapy,Thyroid Function,Physical Examination,Adenopathy,Pathology,Focality,...,N,M,Stage,Response,Recurred,Clinical Hyperthyroidism,Clinical Hypothyroidism,Euthyroid,Subclinical Hyperthyroidism,Subclinical Hypothyroidism
0,27,0,0,0,0,Euthyroid,2,0,3.0,1,...,0,0,0,2,0,0,0,1,0,0
1,34,0,0,1,0,Euthyroid,3,0,3.0,1,...,0,0,0,3,0,0,0,1,0,0
2,30,0,0,0,0,Euthyroid,2,0,3.0,1,...,0,0,0,3,0,0,0,1,0,0
3,62,0,0,0,0,Euthyroid,2,0,3.0,1,...,0,0,0,3,0,0,0,1,0,0
4,62,0,0,0,0,Euthyroid,3,0,3.0,0,...,0,0,0,3,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378,72,1,1,1,1,Euthyroid,2,1,0.0,1,...,2,1,4,0,1,0,0,1,0,0
379,81,1,1,0,1,Euthyroid,3,4,0.0,0,...,2,1,4,1,1,0,0,1,0,0
380,72,1,1,1,0,Euthyroid,3,3,0.0,0,...,2,1,4,1,1,0,0,1,0,0
381,61,1,1,1,1,Clinical Hyperthyroidism,3,4,2.0,0,...,2,0,3,1,1,1,0,0,0,0


In [47]:
# Dro[ping the thyroid function
df2.drop("Thyroid Function", axis = 1, inplace = True)
df2

Unnamed: 0,Age,Gender,Smoking,Hx Smoking,Hx Radiothreapy,Physical Examination,Adenopathy,Pathology,Focality,Risk,...,N,M,Stage,Response,Recurred,Clinical Hyperthyroidism,Clinical Hypothyroidism,Euthyroid,Subclinical Hyperthyroidism,Subclinical Hypothyroidism
0,27,0,0,0,0,2,0,3.0,1,0,...,0,0,0,2,0,0,0,1,0,0
1,34,0,0,1,0,3,0,3.0,1,0,...,0,0,0,3,0,0,0,1,0,0
2,30,0,0,0,0,2,0,3.0,1,0,...,0,0,0,3,0,0,0,1,0,0
3,62,0,0,0,0,2,0,3.0,1,0,...,0,0,0,3,0,0,0,1,0,0
4,62,0,0,0,0,3,0,3.0,0,0,...,0,0,0,3,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378,72,1,1,1,1,2,1,0.0,1,2,...,2,1,4,0,1,0,0,1,0,0
379,81,1,1,0,1,3,4,0.0,0,2,...,2,1,4,1,1,0,0,1,0,0
380,72,1,1,1,0,3,3,0.0,0,2,...,2,1,4,1,1,0,0,1,0,0
381,61,1,1,1,1,3,4,2.0,0,2,...,2,0,3,1,1,1,0,0,0,0


In [48]:
# Renaming the Columns
df2 = df2.rename(columns = {"Hx Smoking" : "Hx_Smoking",
                            "Physical Examination" : "Physical_Examination",
                            "Hx Radiothreapy" : "Hx_Radiothreapy",
                            "Clinical Hyperthyroidism" : "Clinical_Hyperthyroidism",
                            "Clinical Hypothyroidism" : "Clinical_Hypothyroidism",
                            "Subclinical Hyperthyroidism" : "Subclinical_Hyperthyroidism",
                            "Subclinical Hypothyroidism" : "Subclinical_Hypothyroidism"} )
df2.head(2)

Unnamed: 0,Age,Gender,Smoking,Hx_Smoking,Hx_Radiothreapy,Physical_Examination,Adenopathy,Pathology,Focality,Risk,...,N,M,Stage,Response,Recurred,Clinical_Hyperthyroidism,Clinical_Hypothyroidism,Euthyroid,Subclinical_Hyperthyroidism,Subclinical_Hypothyroidism
0,27,0,0,0,0,2,0,3.0,1,0,...,0,0,0,2,0,0,0,1,0,0
1,34,0,0,1,0,3,0,3.0,1,0,...,0,0,0,3,0,0,0,1,0,0


### Splitting The Dataset df2 to X_train, X_test, y_train, and y_train

In [49]:
#Selceting features (X) and label (y)
X = df2.drop("Recurred", axis = 1)
y = df2["Recurred"],
print(X)
print(" ")
print(y)

     Age  Gender  Smoking  Hx_Smoking  Hx_Radiothreapy  Physical_Examination  \
0     27       0        0           0                0                     2   
1     34       0        0           1                0                     3   
2     30       0        0           0                0                     2   
3     62       0        0           0                0                     2   
4     62       0        0           0                0                     3   
..   ...     ...      ...         ...              ...                   ...   
378   72       1        1           1                1                     2   
379   81       1        1           0                1                     3   
380   72       1        1           1                0                     3   
381   61       1        1           1                1                     3   
382   67       1        1           0                0                     3   

     Adenopathy  Pathology  Focality  R

In [50]:
#Splting The X and y to X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

ValueError: Found input variables with inconsistent numbers of samples: [364, 1]

In [51]:
# The above error indicates that there may be empty cell(s) or duplicate(s)
# Checking for empty cells and duplicates
print("Empty cell : ", df2.isna().sum().sum())
print("Duplicates : ", df2.duplicated().sum())

Empty cell :  0
Duplicates :  11


In [52]:
# Droping the duplicates
df2 = df2.drop_duplicates()
print("Duplicates : ", df2.duplicated().sum())

Duplicates :  0


In [53]:
#Selceting features (X) and label (y)
X = df2.drop("Recurred", axis = 1)
y = df2["Recurred"]

In [54]:
#Splting The X and y to X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

### Model Trainning

In [55]:
DTC = DecisionTreeClassifier()
DTC.fit(X_train, y_train)

In [56]:
ypred = DTC.predict(X_test)
ypred

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0])

In [57]:
#Evaluatiion
print(DTC.score(X_test, y_test))
print(accuracy_score(ypred, y_test))
print(f1_score(ypred, y_test))
print(confusion_matrix(ypred, y_test))

0.9528301886792453
0.9528301886792453
0.9152542372881356
[[74  2]
 [ 3 27]]


#### K Nieghbors Classifier

In [58]:
KNN = KNeighborsClassifier()
KNN.fit(X_train, y_train)

In [59]:
ypred = KNN.predict(X_test)
ypred

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0])

In [60]:
#Evaluatiion
print(KNN.score(X_test, y_test))
print(accuracy_score(ypred, y_test))
print(f1_score(ypred, y_test))
print(confusion_matrix(ypred, y_test))

0.8867924528301887
0.8867924528301887
0.7777777777777778
[[73  8]
 [ 4 21]]


#### SVM Classifier

In [61]:
SVM = SVC()
SVM.fit(X_train, y_train)

In [62]:
ypred = SVM.predict(X_test)
ypred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0])

In [63]:
#Evaluatiion
print(SVM.score(X_test, y_test))
print(accuracy_score(ypred, y_test))
print(f1_score(ypred, y_test))
print(confusion_matrix(ypred, y_test))

0.7830188679245284
0.7830188679245284
0.41025641025641024
[[75 21]
 [ 2  8]]


#### Random Forest Classifier

In [64]:
RFC = RandomForestClassifier()
RFC.fit(X_train, y_train)

In [65]:
ypred = RFC.predict(X_test)
ypred

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0])

In [66]:
#Evaluatiion
print(RFC.score(X_test, y_test))
print(accuracy_score(ypred, y_test))
print(f1_score(ypred, y_test))
print(confusion_matrix(ypred, y_test))

0.9811320754716981
0.9811320754716981
0.9655172413793104
[[76  1]
 [ 1 28]]


#### Gradient Boosting Classifier

In [67]:
GBC = GradientBoostingClassifier()
GBC.fit(X_train, y_train)

In [68]:
ypred = GBC.predict(X_test)
ypred

array([0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0])

In [69]:
#Evaluatiion
print(GBC.score(X_test, y_test))
print(accuracy_score(ypred, y_test))
print(f1_score(ypred, y_test))
print(confusion_matrix(ypred, y_test))

0.9716981132075472
0.9716981132075472
0.9491525423728814
[[75  1]
 [ 2 28]]


In [70]:
# From the evaluation, decision tree, random forest and gradient boosting performs better than the other two.
# let Tune the decision tree for better performance.
param_grid = {
              "max_depth" : [3, 5, 10],
              "min_samples_split" : [2, 5, 10],
              "min_samples_leaf" : [1, 5, 10] }

grid_search = GridSearchCV(estimator = DTC, param_grid = param_grid, cv = 5)

In [71]:
grid_search.fit(X_train, y_train)

In [72]:
print(grid_search.best_score_)
print(grid_search.best_estimator_)

0.9313469387755102
DecisionTreeClassifier(max_depth=3, min_samples_leaf=10)


In [73]:
model = grid_search.best_estimator_
model.fit(X_train, y_train)

### Saving The Model

In [75]:
joblib.dump(model, "Cancer Predictor.joblib")

['Cancer Predictor.joblib']

### Model Deployment

In [76]:
# Uploading the model
model = joblib.load("Cancer Predictor.joblib")

In [77]:

    
    st.title("Cancer Predictor")
    Age = st.text_input("Age")
    Gender = st.number_input("Gender", min_value = 0, max_value = 1)     
    Smoking = st.number_input("Smoking", min_value = 0, max_value = 1)
    Hx_Smoking = st.number_input("Hx_Smoking", min_value = 0, max_value = 1)
    Hx_Radiothreapy = st.number_input("Hx_Radiothreapy", min_value = 0, max_value = 1)
    Physical_Examination = st.number_input("Physical_Examination", min_value = 0, max_value = 5)
    Adenopathy = st.number_input("Adenopathy", min_value = 0, max_value = 5)
    Pathology = st.number_input("Pathology", min_value = 0, max_value = 3)
    Focality = st.number_input("Focality", min_value = 0, max_value = 1)
    Risk = st.number_input("Risk", min_value = 0, max_value = 2)
    T = st.number_input("T", min_value = 0, max_value = 6)
    N = st.number_input("N", min_value = 0, max_value = 2)
    M = st.number_input("M", min_value = 0, max_value = 1)
    Stage = st.number_input("Stage", min_value = 0, max_value = 4)
    Response = st.number_input("Response", min_value = 0, max_value = 3)
    Clinical_Hyperthyroidism = st.number_input("Clinical_Hyperthyroidism", min_value = 0, max_value = 1)
    Clinical_Hypothyroidism = st.number_input("Clinical_Hypothyroidism", min_value = 0, max_value = 1)
    Euthyroid = st.number_input("Euthyroid")
    Subclinical_Hyperthyroidism = st.number_input("Subclinical_Hyperthyroidism", min_value = 0, max_value = 1)
    Subclinical_Hypothyroidism = st.number_input("Subclinical_Hypothyroidism", min_value = 0, max_value = 1)

    if st.button("Prediction"):
        predict = model.predict([[Age, Gender, Smoking, Hx_Smoking, Hx_Radiothreapy, Physical_Examination, Adenopathy,
                                     Pathology, Focality, Risk, T, N, M, Stage, Response, Clinical_Hyperthyroidism,
                                     Clinical_Hypothyroidism, Euthyroid, Subclinical_Hyperthyroidism, Subclinical_Hypothyroidism]])
        st.success(predict[0])
        st.write("Prediction is 0 cancer is likely not to reoccur, but if 1 cancer is likely to reoccur")




2025-01-09 23:31:02.561 
  command:

    streamlit run C:\Users\Hp\AppData\Local\Programs\Python\Python313\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-01-09 23:31:02.607 Session state does not function when running a script without `streamlit run`


In [78]:
model.predict(X_test)

array([0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0])