In [301]:
import pickle
import pandas as pd

In [302]:
with open(r".\CancerStagersModel.pkl",'rb') as f:
    model = pickle.load(f) # load the saved model

In [303]:
def df_test(dataframe: pd.DataFrame): # takes in a pandas dataframe 
    if all(x in dataframe.columns for x in model.feature_names_in_)==False:
        raise Exception(f"""Missing Mandatory Columns Detected. 
Please ensure your dataframe has the required columns:\n\n{list(model.feature_names_in_)}""")
    # checks that the mandatory columns are present in the dataframe
    elif (any(dataframe[model.feature_names_in_].isna().sum()>0)): 
        # checks if any of the mandatory columns (as found in model.feature_names_in_) has a missing value
        missing_cols = (list((dataframe[model.feature_names_in_].isna().sum()>0).index))
        raise Exception(f"Missing Data Detected in Columns {missing_cols}")

    else:
        # if above checks are fulfilled
        y_pred = model.predict(dataframe[model.feature_names_in_])
        dic = {"healthy":1,"screening stage cancer":1,"early stage cancer":2,"mid stage cancer":3,"late stage cancer":4}
        reverse_lis = [k for k,_ in sorted(dic.items(),key=lambda x:x[-1])]
        reverse_lis = [reverse_lis[0]+'/'+reverse_lis[1]]+reverse_lis[2:]
        r_dic = {(k+1):v for k,v in enumerate(reverse_lis)}
        target = 'class_label'
        if target in dataframe.columns: 
        # if there is an actual class_label column present, can compare actual data with predicted data
            arr = dataframe[target].map(dic) # places both healthy and screening stage cancer in the same class
            df = pd.DataFrame({"Actual":dataframe[target].values,
                    "Modified Actual":arr.values,
                    "Predicted":y_pred})
            for e in df.columns[1:]:
                df[e]=df[e].map(r_dic) 
                # numeric labels (1,2,3,4) converted to meaningful information
                # (healthy/screening stage , early stage, mid stage, late stage)
            df['Match']=df['Modified Actual']==df['Predicted']
            print(df['Match'].sum()) 
            # prints the number of matches between predicted results and modified actual results
            print(df['Match'].sum()/df['Match'].__len__()*100)
            # expresses the above number as a percentage of total number of results
            print(df)
            # displays the information
        y_pred = pd.Series(list(map(lambda x:r_dic[x],y_pred)))
        y_pred.name = 'Predicted'
        return y_pred

In [304]:
test_path = r".\NUS_IT\Test_Set.csv" # default test path; can change to other csv files of a similar format
df = pd.read_csv(test_path)
df_sample = df.sample(30)

In [305]:
pd.set_option('expand_frame_repr', False)
pred=df_test(df_sample)

25
83.33333333333334
                    Actual                 Modified Actual                       Predicted  Match
0       early stage cancer              early stage cancer              early stage cancer   True
1       early stage cancer              early stage cancer              early stage cancer   True
2       early stage cancer              early stage cancer              early stage cancer   True
3       early stage cancer              early stage cancer              early stage cancer   True
4   screening stage cancer  healthy/screening stage cancer  healthy/screening stage cancer   True
5                  healthy  healthy/screening stage cancer              early stage cancer  False
6        late stage cancer               late stage cancer               late stage cancer   True
7       early stage cancer              early stage cancer                mid stage cancer  False
8       early stage cancer              early stage cancer              early stage cancer   True

In [306]:
print(pred)

0                 early stage cancer
1                 early stage cancer
2                 early stage cancer
3                 early stage cancer
4     healthy/screening stage cancer
5                 early stage cancer
6                  late stage cancer
7                   mid stage cancer
8                 early stage cancer
9     healthy/screening stage cancer
10                  mid stage cancer
11                early stage cancer
12                 late stage cancer
13                early stage cancer
14                  mid stage cancer
15    healthy/screening stage cancer
16    healthy/screening stage cancer
17                  mid stage cancer
18                 late stage cancer
19                  mid stage cancer
20                  mid stage cancer
21                 late stage cancer
22                early stage cancer
23                  mid stage cancer
24    healthy/screening stage cancer
25                  mid stage cancer
26                  mid stage cancer
2