[Reference](https://towardsdatascience.com/from-raw-data-to-web-app-deployment-with-atom-and-streamlit-d8df381aa19f)

# Set up


In [8]:
!pip install streamlit
!pip install atom-ml

Collecting streamlit
  Downloading streamlit-1.3.0-py2.py3-none-any.whl (9.2 MB)
[K     |████████████████████████████████| 9.2 MB 4.3 MB/s 
Collecting validators
  Downloading validators-0.18.2-py3-none-any.whl (19 kB)
Collecting base58
  Downloading base58-2.1.1-py3-none-any.whl (5.6 kB)
Collecting pympler>=0.9
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[K     |████████████████████████████████| 164 kB 42.4 MB/s 
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.24-py3-none-any.whl (180 kB)
[K     |████████████████████████████████| 180 kB 55.2 MB/s 
Collecting blinker
  Downloading blinker-1.4.tar.gz (111 kB)
[K     |████████████████████████████████| 111 kB 61.3 MB/s 
Collecting watchdog
  Downloading watchdog-2.1.6-py3-none-manylinux2014_x86_64.whl (76 kB)
[K     |████████████████████████████████| 76 kB 5.3 MB/s 
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.7.1-py2.py3-none-any.whl (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 50.5 MB/s 
Colle

Collecting atom-ml
  Downloading atom-ml-4.10.0.tar.gz (384 kB)
[K     |████████████████████████████████| 384 kB 5.2 MB/s 
Collecting pandas>=1.3.0
  Downloading pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
[K     |████████████████████████████████| 11.3 MB 59.6 MB/s 
[?25hCollecting pandas-profiling>=2.3.0
  Downloading pandas_profiling-3.1.0-py2.py3-none-any.whl (261 kB)
[K     |████████████████████████████████| 261 kB 70.0 MB/s 
[?25hCollecting explainerdashboard>=0.3.7
  Downloading explainerdashboard-0.3.7-py3-none-any.whl (305 kB)
[K     |████████████████████████████████| 305 kB 53.6 MB/s 
[?25hCollecting mlflow>=1.15.0
  Downloading mlflow-1.22.0-py3-none-any.whl (15.5 MB)
[K     |████████████████████████████████| 15.5 MB 47.9 MB/s 
Collecting scikit-optimize>=0.9.0
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[K     |████████████████████████████████| 100 kB 10.4 MB/s 
[?25hCollecting nltk>=3.6.2
  Downloading nlt

In [1]:
import pandas as pd
import streamlit as st
from atom import ATOMClassifier
# Expand the web app across the whole screen
st.set_page_config(layout="wide")

pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.


# Pipeline menu


In [2]:
st.sidebar.title("Pipeline")

# Data cleaning options
st.sidebar.subheader("Data cleaning")
scale = st.sidebar.checkbox("Scale", False, "scale")
encode = st.sidebar.checkbox("Encode", False, "encode")
impute = st.sidebar.checkbox("Impute", False, "impute")

2021-12-26 11:46:41.303 
  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]


In [3]:
# Model options
st.sidebar.subheader("Models")
models = {
    "gnb": st.sidebar.checkbox("Gaussian Naive Bayes", True, "gnb"),
    "rf": st.sidebar.checkbox("Random Forest", True, "rf"),
    "et": st.sidebar.checkbox("Extra-Trees", False, "et"),
    "xgb": st.sidebar.checkbox("XGBoost", False, "xgb"),
    "lgb": st.sidebar.checkbox("LightGBM", False, "lgb"),
}

# Data ingestion


In [4]:
st.header("Data")
data = st.file_uploader("Upload data:", type="csv")
# If a dataset is uploaded, show a preview
if data is not None:
    data = pd.read_csv(data)
    st.text("Data preview:")
    st.dataframe(data.head())

# Model training and evaluation


In [5]:
st.header("Results")

if st.sidebar.button("Run"):
    placeholder = st.empty()  # Empty to overwrite write statements
    placeholder.write("Initializing atom...")

    # Initialize atom
    atom = ATOMClassifier(data, verbose=2, random_state=1)

    if scale:
        placeholder.write("Scaling the data...")
        atom.scale()
    if encode:
        placeholder.write("Encoding the categorical features...")
        atom.encode(strategy="LeaveOneOut", max_onehot=10)
    if impute:
        placeholder.write("Imputing the missing values...")
        atom.impute(strat_num="median", strat_cat="most_frequent")
    
    placeholder.write("Fitting the models...")
    to_run = [key for key, value in models.items() if value]
    atom.run(models=to_run, metric="f1")
    
    # Display metric results
    placeholder.write(atom.evaluate())

    # Draw plots
    col1, col2 = st.beta_columns(2)
    col1.write(atom.plot_roc(title="ROC curve", display=None))
    col2.write(atom.plot_prc(title="PR curve", display=None))

else:
    st.write("No results yet. Click the run button!")