## Load model saved

In [1]:
import joblib
import pandas as pd

# Load test data

df_test = pd.read_csv("Test_data/test_data.csv")
X = df_test.drop(columns=["cancer_type", "type"])
y = df_test['cancer_type']
display(X,y)

# Load model

bladder_model = joblib.load("Model_Saved/bladder.joblib")
brain_model = joblib.load("Model_Saved/brain.joblib")
breast_model = joblib.load("Model_Saved/breast.joblib")

Unnamed: 0,1007_s_at,1053_at,117_at,121_at,1255_g_at,1294_at,1316_at,1320_at,1405_i_at,1431_at,...,AFFX-r2-Ec-bioD-3_at,AFFX-r2-Ec-bioD-5_at,AFFX-r2-P1-cre-3_at,AFFX-r2-P1-cre-5_at,AFFX-ThrX-3_at,AFFX-ThrX-5_at,AFFX-ThrX-M_at,AFFX-TrpnX-3_at,AFFX-TrpnX-5_at,AFFX-TrpnX-M_at
0,12.601370,8.873161,9.214310,9.690334,4.341822,7.924622,6.482649,6.499633,5.368997,6.634785,...,11.330031,10.967963,13.693713,13.721795,5.254671,4.749096,3.968936,3.548965,4.554688,4.584465
1,12.921025,8.620507,7.238575,8.245388,3.682762,8.027397,7.575340,5.517496,6.761186,5.180190,...,12.758212,12.202741,13.985927,13.684496,5.013313,4.618019,3.963455,3.741682,4.617641,4.374241
2,11.046641,8.458561,6.425059,8.770478,6.532255,7.132320,7.857738,5.823012,4.491771,7.252599,...,13.100610,12.697922,14.362506,14.061722,4.950459,4.612189,4.010441,3.866341,4.471244,4.766970
3,12.369828,8.863015,6.623091,8.319927,4.084849,8.024815,7.981879,6.000366,5.452527,5.422245,...,12.482283,11.903074,13.991402,13.792137,4.936046,4.616858,3.701871,3.678101,4.555495,4.379911
4,11.299893,8.286180,6.993723,8.770720,4.026220,7.585630,7.870659,5.819242,4.631284,5.078618,...,13.276996,12.869971,14.476079,14.273778,5.100812,4.268341,3.989762,3.511990,4.644976,4.522790
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,6.533818,3.095183,6.540493,6.361546,2.295276,4.636936,4.932149,2.955106,5.199770,2.495139,...,11.364050,10.985695,13.735517,13.364968,7.703983,3.172810,4.933504,2.236060,2.343445,2.220485
166,5.657621,3.159085,3.978410,7.174108,2.742771,4.937159,5.861396,3.510067,2.972037,2.756564,...,13.084227,12.915690,14.376607,14.033410,9.916877,3.746717,5.836157,2.485093,2.626120,2.635610
167,6.515137,2.979765,3.940801,7.209822,2.525715,4.822728,5.517274,3.495081,3.780623,2.694867,...,12.660566,12.586157,14.093004,13.682465,9.405094,3.729148,6.286082,2.287532,2.712309,2.546700
168,5.998170,3.165824,4.268922,7.198874,2.738943,4.721181,5.517444,3.193544,3.143453,2.709082,...,12.069660,12.057527,13.757772,13.404077,7.642801,3.296269,3.261484,2.447714,2.889734,2.746983


0        brain
1        brain
2       normal
3        brain
4       normal
        ...   
165    bladder
166    bladder
167     normal
168     normal
169     normal
Name: cancer_type, Length: 170, dtype: object

## Preproccessing test data

In [9]:
# Ensure feature alignment for each model
X_bladder = X[bladder_model.feature_names_in_]
X_brain = X[brain_model.feature_names_in_]
X_breast = X[breast_model.feature_names_in_]

# Initialize a list to store results
results = []

# Iterate over each sample in the test dataset
for index, row in df_test.iterrows():
    try:
        # Align features for each model and preserve feature names
        sample_bladder = pd.DataFrame([row.loc[bladder_model.feature_names_in_].values], 
                                      columns=bladder_model.feature_names_in_)
        sample_brain = pd.DataFrame([row.loc[brain_model.feature_names_in_].values], 
                                    columns=brain_model.feature_names_in_)
        sample_breast = pd.DataFrame([row.loc[breast_model.feature_names_in_].values], 
                                     columns=breast_model.feature_names_in_)
        
        # Get probability predictions for each model
        bladder_proba = bladder_model.predict_proba(sample_bladder)[0]
        brain_proba = brain_model.predict_proba(sample_brain)[0]
        breast_proba = breast_model.predict_proba(sample_breast)[0]
        
        # Store the results
        result = {
            'sample_index': index,
            'bladder_proba': bladder_proba.tolist(),
            'brain_proba': brain_proba.tolist(),
            'breast_proba': breast_proba.tolist(),
            'true_label': y.iloc[index]
        }
        results.append(result)
    except Exception as e:
        print(f"Error processing sample at index {index}: {e}")

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv("prediction_results.csv", index=False)

print("Predicted probabilities saved to 'prediction_results.csv'.")


Predicted probabilities saved to 'prediction_results.csv'.


In [10]:
# Split probabilities into separate columns for clarity
results_cleaned = []
for result in results:
    bladder_proba_class_0, bladder_proba_class_1 = result['bladder_proba']
    brain_proba_class_0, brain_proba_class_1 = result['brain_proba']
    breast_proba_class_0, breast_proba_class_1 = result['breast_proba']
    
    results_cleaned.append({
        'sample_index': result['sample_index'],
        'bladder_proba_class_0': bladder_proba_class_0,
        'bladder_proba_class_1': bladder_proba_class_1,
        'brain_proba_class_0': brain_proba_class_0,
        'brain_proba_class_1': brain_proba_class_1,
        'breast_proba_class_0': breast_proba_class_0,
        'breast_proba_class_1': breast_proba_class_1,
        'true_label': result['true_label']
    })

# Convert cleaned results to DataFrame
results_cleaned_df = pd.DataFrame(results_cleaned)

# Save the cleaned results to a new CSV file
results_cleaned_df.to_csv("prediction_results_cleaned.csv", index=False)

print("Cleaned predicted probabilities saved to 'prediction_results_cleaned.csv'.")


Cleaned predicted probabilities saved to 'prediction_results_cleaned.csv'.
