In [11]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor

In [12]:
# Load your data
@st.cache_data
def load_data():
    df = pd.read_excel("Research Raw Data.xlsx")
    column_names=["S#","Age","Gender",
              "PTA_500Hz","ASSR_500Hz","PTA_1KHz","ASSR_1KHz","PTA_2KHz","ASSR_2KHz","PTA_4KHz","ASSR_4KHz",
              "PTA_500Hz","ASSR_500Hz","PTA_1KHz","ASSR_1KHz","PTA_2KHz","ASSR_2KHz","PTA_4KHz","ASSR_4KHz"]

    df.columns=column_names
    df.drop("S#",axis=1,inplace=True)
    df1= df.iloc[:,:10]
    df2=df.iloc[:, [0,1,10,11,12,13,14,15,16,17]]
    df3=pd.concat([df1,df2],ignore_index=True)

    df3.replace('-',np.nan)
    df3 = df3[["Age","Gender",
              "PTA_500Hz","PTA_1KHz","PTA_2KHz","PTA_4KHz","ASSR_500Hz","ASSR_1KHz","ASSR_2KHz","ASSR_4KHz"]]
    exclude_cols = ['Gender']
    cols_to_convert = df.columns.difference(exclude_cols)
    df3[cols_to_convert] = df3[cols_to_convert].apply(pd.to_numeric, errors='coerce')

    df_cleaned = df3.dropna(thresh=df3.shape[1] - 2,ignore_index=True)
    

    return df_cleaned

df = load_data()

2024-10-03 01:20:36.576 No runtime found, using MemoryCacheStorageManager


In [13]:
def handle_missing_values(row):
    for i in range(len(row)):
       
        if i < 6:
            
            row_mean = row[2:].mean()  
            
            row[i] = row[i] if not pd.isna(row[i]) else 110
        else:
           
            row[i] = row[i] if not pd.isna(row[i]) else 100  # Using 100 as an example
            
    return row
df.iloc[:, :] = df.apply(handle_missing_values, axis=1)
df.to_excel("Data.xlsx")

  row[i] = row[i] if not pd.isna(row[i]) else 110
  row[i] = row[i] if not pd.isna(row[i]) else 110
  row[i] = row[i] if not pd.isna(row[i]) else 100  # Using 100 as an example
  row[i] = row[i] if not pd.isna(row[i]) else 100  # Using 100 as an example


In [14]:
# Normalize the data
columns_to_scale = ["Age","ASSR_500Hz", "ASSR_1KHz", "ASSR_2KHz", "ASSR_4KHz"]
scaler = StandardScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

# Prepare features and target
X = df[["Age", "Gender","ASSR_500Hz", "ASSR_1KHz", "ASSR_2KHz", "ASSR_4KHz"]]
y = df[["PTA_500Hz", "PTA_1KHz", "PTA_2KHz", "PTA_4KHz"]]

# Convert categorical variable
X = pd.get_dummies(X, columns=['Gender'], drop_first=True)
  


In [15]:
st.title("PTA Threshold Prediction App")

# Input features with default values
age = st.number_input("Age", min_value=0, max_value=120, value=10)
gender = st.selectbox("Gender", ["Male", "Female"], index=0)  # Default to "Male"
ASSR_500Hz = st.number_input("ASSR_500Hz", value=50)
ASSR_1KHz = st.number_input("ASSR_1KHz", value=60)
ASSR_2KHz = st.number_input("ASSR_2KHz", value=60)
ASSR_4KHz = st.number_input("ASSR_4KHz", value=70)

# Model selection
model_option = st.selectbox("Select a model", ["LinearRegression", "SVM", "RandomForest", "DecisionTree", "KNN"])

# Hyperparameter tuning options
params = {}
if model_option == "SVM":
    params = {
        'C': [st.slider("C (Regularization)", 0.01, 10.0, 1.0)],  # Wrap in a list
        'kernel': [st.selectbox("Kernel", ["linear", "poly", "rbf", "sigmoid"])]
    }
elif model_option == "RandomForest":
    params = {
        'n_estimators': [st.slider("Number of Trees", 10, 200, 100)],
        'max_depth': [st.slider("Max Depth", 1, 20, 10)]
    }
elif model_option == "DecisionTree":
    params = {
        'max_depth': [st.slider("Max Depth", 1, 20, 10)]
    }
elif model_option == "KNN":
    n_neighbors = st.slider("Number of Neighbors", 1, 30, 5)
    params = {
        'n_neighbors': [n_neighbors]  # Wrap in a list
    }

# Prepare input data for prediction
input_data = pd.DataFrame({
    "Age": [age],
    "Gender": [gender],
    "ASSR_500": [ASSR_500Hz],
    "ASSR_1KHz": [ASSR_1KHz],
    "ASSR_2KHz": [ASSR_2KHz],
    "ASSR_4KHz": [ASSR_4KHz]
})

# Normalize input data
input_data[columns_to_scale] = scaler.transform(input_data[columns_to_scale])
input_data = pd.get_dummies(input_data, columns=['Gender'], drop_first=True).reindex(columns=X.columns, fill_value=0)




KeyError: "['ASSR_500Hz'] not in index"

In [None]:
# Button to train and predict
if st.button("Predict"):
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize the selected model
    if model_option == "LinearRegression":
        model = LinearRegression()
    elif model_option == "SVM":
        model = MultiOutputRegressor(SVR())
    elif model_option == "RandomForest":
        model = RandomForestRegressor()
    elif model_option == "DecisionTree":
        model = DecisionTreeRegressor()
    elif model_option == "KNN":
        model = MultiOutputRegressor(KNeighborsRegressor())

    # Perform hyperparameter tuning if applicable
    if params:
        grid_search = GridSearchCV(model, param_grid=params, cv=5)
        grid_search.fit(X_train, y_train)
        model = grid_search.best_estimator_
    else:
        model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(input_data)

    # Display prediction result
    st.write(f"Predicted PTA Threshold (P-500): {y_pred[0]:.2f}")

    # Calculate and display model performance metrics
    y_test_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_test_pred)
    
    st.write(f"Mean Absolute Error: {mae:.2f}")