In [1]:
# Dependencies & Installs
import pandas as pd
import streamlit as st
import pickle

In [3]:
# Load data from S3 into a Pandas DataFrame (data needs to be scaled and cleaned already)
# s3_file_path = "s3://esophageal-cancer-biochem-data/Joined_df_cleaned.csv"
# df = pd.read_csv(s3_file_path)

local_file_path = "Data_Cleaned/User_Samples/users.csv"
df = pd.read_csv(local_file_path)
df.head()

Unnamed: 0,Patient Group,O95445_AAL,P00450_AAL,P00734_AAL,P00736_AAL,P00738_AAL,P00747_AAL,P00748_AAL,P00751_AAL,P01008_AAL,...,P02748_LSPIYNLVPVK_Ratio_NPL,P04114_SPAFTDLHLR_Ratio_NPL,P06396_AVEVLPK_Ratio_NPL,P0C0L5_GSFEFPVGDAVSK_Ratio_NPL,P10643_LTPLYELVK_Ratio_NPL,Protein (μg/μL),Age at Collection,BMI (kg/m2),Gender_F,Gender_M
0,BE-LGD,235835.9696,446414.5379,50712.82578,126492.5,3367665.0,1460257.0,185795.7003,484956.8,269542.8,...,1.626764,4.462419,1.133528,0.649006,1.089551,76.593205,55.0,27.7,0,1
1,EAC,253204.992,548152.8704,53040.29111,117080.3,9780856.0,1276418.0,102165.9734,423352.7,206839.6,...,0.682581,4.710647,1.187228,0.409018,0.965052,67.596795,65.0,32.6,0,1
2,BE-LGD,223918.8373,668157.9861,49551.79843,114322.8,26163390.0,1378266.0,135028.6851,467838.1,139354.4,...,1.183182,3.761115,1.377637,0.62745,1.275043,74.833366,55.0,30.2,1,0
3,NSE,317408.86981,624661.826663,166048.185854,58798640.0,31518610.0,2006994.0,66677.865179,8918962.0,3406724.0,...,1.85858,11.655492,4.47199,0.8215,2.668353,72.455779,54.0,30.04,1,0
4,BE-HGD,251668.1616,345571.7982,67323.52052,137969.3,22201310.0,1482350.0,86503.25636,405331.8,275848.5,...,0.859171,6.339829,1.221672,0.5802,2.279625,80.726013,63.0,33.5,0,1


In [9]:
# Load the trained model
with open('Models/Model_Saved/model_rf_LogisticRegression.pkl', 'rb') as f:
    model_rf = pickle.load(f)

In [14]:
# Define Streamlit app
def app():
    
    # Set app title
    st.title('Esophageal Cancer Risk Assessment app')

    # Add some text
    st.write("Welcome to our Esophageal Cancer Risk Assessment app! This app utilizes advanced machine learning algorithms to estimate your risk of developing esophageal cancer based on pre-screening and blood sample data. Esophageal cancer is a life-threatening disease affecting millions of people worldwide, and early diagnosis is crucial for improving survival rates. Traditional diagnostic methods, such as endoscopy, can be invasive and expensive. Our app aims to provide a faster, more affordable, and less invasive alternative by leveraging machine learning techniques like logistic regression, decision trees, and support vector machines. Using a dataset of biochemical data from patients with varying esophageal conditions, our models have been trained and evaluated to deliver accurate predictions. Get started by inputting your data to assess your esophageal cancer risk.")
    st.image("Images/ai-generated-image-dalle.png", use_column_width=True)

    # Get user input
    age = st.number_input("Enter your age", value=30, min_value=18, max_value=100)
    sex = st.selectbox("Select your sex", ["male", "female"])
    bmi = st.number_input("Enter your BMI", value=25, min_value=0, max_value=50)
    diagnosed = st.selectbox("Have you been diagnosed?", ["No", "Barrett esophagus - no dysplasia", "Barrett esophagus - low dysplasia", "Barrett esophagus - high dysplasia", "Esophageal cancer"])

    patient_group = ""
    if diagnosed == "No":
        patient_group = "NSE"
    elif diagnosed == "Barrett esophagus - no dysplasia":
        patient_group = "BE"
    elif diagnosed == "Barrett esophagus - low dysplasia":
        patient_group = "BE-LGD"
    elif diagnosed == "Barrett esophagus - high dysplasia":
        patient_group = "BE-HGD"
    elif diagnosed == "Esophageal cancer":
        patient_group = "EAC"

    # Create a DataFrame with the user input
    user_input = pd.DataFrame({
        "Age at Collection": [age],
        "sex": [sex],
        "BMI (kg/m2)": [bmi]
    })

    # If the user clicks the "Generate Blood Test Results" button, fetch a sample row from the dataset
    if st.button("Generate Blood Test Results"):
        sample_row = df.sample(n=1)
        blood_results_df = sample_row.drop(columns=["Patient Group", "Age at Collection", "BMI (kg/m2)", "Gender_F", "Gender_M"])
        user_input = pd.concat([user_input, blood_results_df.reset_index(drop=True)], axis=1)

        # Make a prediction using the model
        prediction = model_rf.predict(user_input)

        # Display the prediction
        if prediction[0] == 1:
            st.write("You have a high risk of developing esophageal cancer.")
        else:
            st.write("You have a low risk of developing esophageal cancer.")
    else:
        st.write("Click the button to generate blood test results and make a prediction.")

# Run the Streamlit app
if __name__ == '__main__':
    app()