In [None]:
# Hentikan proses yang nyangkut (abaikan error kalau tidak ada)
!pkill -f "streamlit run" || true
!pkill -f cloudflared || true
!pkill -f "lt --port" || true

# Install paket
!pip -q install streamlit pandas joblib xgboost scikit-learn
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb > /dev/null
!npm -g install localtunnel > /dev/null


^C
^C
^C


In [None]:
from pathlib import Path
import pandas as pd, numpy as np, joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, MinMaxScaler, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score

PKL_PATH = Path("/content/satisfaction_pipeline.pkl")
DATA_PATH = "/content/Airlines Passanger.csv"
TARGET = "satisfaction"

if PKL_PATH.exists():
    print("✅ Model sudah ada:", PKL_PATH)
else:
    print("Melatih model cepat dan menyimpan ke .pkl …")

    NOMINAL_COLS = ["Gender", "Customer Type", "Type of Travel"]
    ORDINAL_COLS = ["Class"]
    RATING_COLS  = [
        "Inflight wifi service","Departure/Arrival time convenient","Ease of Online booking",
        "Gate location","Food and drink","Online boarding","Seat comfort",
        "Inflight entertainment","On-board service","Leg room service","Baggage handling",
        "Checkin service","Inflight service","Cleanliness"
    ]
    NUMERIC_COLS = ["Age","Flight Distance","Departure Delay in Minutes","Arrival Delay in Minutes"]

    df = pd.read_csv(DATA_PATH)
    for c in ["Unnamed: 0","id"]:
        if c in df.columns: df = df.drop(columns=c)

    assert TARGET in df.columns, f"Kolom target '{TARGET}' tidak ada di CSV."
    X = df.drop(columns=TARGET)
    y = df[TARGET].map({"neutral or dissatisfied": 1, "satisfied": 0})

    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=11, stratify=y)

    # Preprocessor → semua ke numerik
    nominal_pipe = Pipeline([("onehot", OneHotEncoder(handle_unknown="ignore", drop="first"))])
    ordinal_pipe = Pipeline([("ord", OrdinalEncoder(categories=[["Eco","Eco Plus","Business"]]))])
    rating_pipe  = Pipeline([("scale", MinMaxScaler())])

    dep_pipe = Pipeline([("log1p", FunctionTransformer(np.log1p, feature_names_out="one-to-one")),
                         ("scale", MinMaxScaler())])
    arr_pipe = Pipeline([("imp", SimpleImputer(strategy="median")),
                         ("log1p", FunctionTransformer(np.log1p, feature_names_out="one-to-one")),
                         ("scale", MinMaxScaler())])
    age_pipe, dist_pipe = MinMaxScaler(), MinMaxScaler()

    preprocessor = ColumnTransformer(
        transformers=[
            ("nominal", nominal_pipe, NOMINAL_COLS),
            ("ordinal", ordinal_pipe, ORDINAL_COLS),
            ("rating",  rating_pipe,  RATING_COLS),
            ("age",     age_pipe,     ["Age"]),
            ("dist",    dist_pipe,    ["Flight Distance"]),
            ("dep",     dep_pipe,     ["Departure Delay in Minutes"]),
            ("arr",     arr_pipe,     ["Arrival Delay in Minutes"]),
        ],
        remainder="drop",
        verbose_feature_names_out=False
    )

    xgb = XGBClassifier(
        random_state=42,
        n_estimators=150,      # cepat di CPU Colab
        learning_rate=0.10,
        max_depth=4,
        subsample=0.9,
        colsample_bytree=0.9,
        tree_method="hist",
        eval_metric="logloss",
        n_jobs=-1,
    )

    clf = Pipeline([("prep", preprocessor), ("model", xgb)])
    clf.fit(Xtr, ytr)

    # Eval ringkas (opsional)
    proba = clf.predict_proba(Xte)[:, 1]
    pred  = (proba >= 0.5).astype(int)
    print(classification_report(yte, pred, zero_division=0))
    print("AUC:", roc_auc_score(yte, proba))

    joblib.dump(clf, PKL_PATH.as_posix())
    print("Disimpan:", PKL_PATH)


Melatih model cepat dan menyimpan ke .pkl …
              precision    recall  f1-score   support

           0       0.96      0.93      0.95     11286
           1       0.95      0.97      0.96     14690

    accuracy                           0.96     25976
   macro avg       0.96      0.95      0.96     25976
weighted avg       0.96      0.96      0.96     25976

AUC: 0.9931462192174815
Disimpan: /content/satisfaction_pipeline.pkl


In [None]:
%%writefile /content/app.py
import streamlit as st
import pandas as pd
import joblib

# ---- Konfigurasi kolom dataset ----
NOMINAL_COLS = ["Gender", "Customer Type", "Type of Travel"]
ORDINAL_COLS = ["Class"]
RATING_COLS = [
    "Inflight wifi service","Departure/Arrival time convenient","Ease of Online booking",
    "Gate location","Food and drink","Online boarding","Seat comfort",
    "Inflight entertainment","On-board service","Leg room service","Baggage handling",
    "Checkin service","Inflight service","Cleanliness"
]
NUMERIC_COLS = ["Age","Flight Distance","Departure Delay in Minutes","Arrival Delay in Minutes"]
ALL_FEATURES = NOMINAL_COLS + ORDINAL_COLS + RATING_COLS + NUMERIC_COLS
LABEL_MAP = {0:"Satisfied", 1:"Neutral or Dissatisfied"}

st.set_page_config(page_title="Airline Satisfaction", page_icon="✈️", layout="centered")
st.title("✈️ Airlines Passenger Satisfaction Prediction")

@st.cache_resource
def load_model(path="/content/satisfaction_pipeline.pkl"):
    return joblib.load(path)

with st.spinner("Memuat model…"):
    clf = load_model()
st.success("Model siap ✅")

def ensure_columns(df: pd.DataFrame) -> pd.DataFrame:
    df2 = df.copy()
    for c in ALL_FEATURES:
        if c not in df2.columns:
            if c in RATING_COLS: df2[c] = 3
            elif c in NOMINAL_COLS: df2[c] = ""
            elif c in ORDINAL_COLS: df2[c] = "Eco"
            elif c in NUMERIC_COLS: df2[c] = 0
    return df2[ALL_FEATURES]

def predict_df(df_in: pd.DataFrame, threshold: float = 0.5) -> pd.DataFrame:
    X = ensure_columns(df_in)
    proba = clf.predict_proba(X)[:, 1]
    pred  = (proba >= threshold).astype(int)
    out = X.copy()
    out["pred_proba_dissatisfied"] = proba
    out["prediction"] = [LABEL_MAP[p] for p in pred]
    return out

st.subheader("Pengaturan Prediksi")
thresh = st.slider("Ambang (threshold) untuk kelas 'Neutral or Dissatisfied'", 0.05, 0.95, 0.50, 0.05)

st.subheader("Prediksi Satu Penumpang (Form)")
with st.form("single_form", clear_on_submit=False):
    c1, c2 = st.columns(2)
    with c1:
        gender = st.selectbox("Gender", ["Female","Male"], key="g")
        customer_type = st.selectbox("Customer Type", ["Loyal Customer","disloyal customer"], key="ct")
        travel_type = st.selectbox("Type of Travel", ["Personal Travel","Business Travel"], key="tt")
        travel_class = st.selectbox("Class", ["Eco","Eco Plus","Business"], key="cl")
        age = st.number_input("Age", 0, 120, 35, key="age")
        flight_distance = st.number_input("Flight Distance", 0, 20000, 800, key="fd")
    with c2:
        dep_delay = st.number_input("Departure Delay in Minutes", 0, 3000, 0, key="dep")
        arr_delay = st.number_input("Arrival Delay in Minutes", 0, 3000, 0, key="arr")
        ratings = {}
        for i, col in enumerate(RATING_COLS):
            ratings[col] = st.slider(col, 0, 5, 3, key=f"r{i}")

    if st.form_submit_button("Prediksi", use_container_width=True):
        row = {
            "Gender": gender, "Customer Type": customer_type, "Type of Travel": travel_type,
            "Class": travel_class, "Age": age, "Flight Distance": flight_distance,
            "Departure Delay in Minutes": dep_delay, "Arrival Delay in Minutes": arr_delay,
            **ratings
        }
        res = predict_df(pd.DataFrame([row]), threshold=thresh)
        label = res.loc[0, "prediction"]
        p     = float(res.loc[0, "pred_proba_dissatisfied"])
        msg   = f"Prediksi: **{label}** (Prob. dissatisfied: {p:.2f}, threshold={thresh:.2f})"

        if label == "Neutral or Dissatisfied":
            st.error(msg, icon="❌")   # latar MERAH
        else:
            st.success(msg, icon="✅") # latar HIJAU

st.subheader("Batch Prediction (Upload CSV)")
up = st.file_uploader("Upload CSV (tanpa kolom 'satisfaction')", type=["csv"])
if up is not None:
    df_new = pd.read_csv(up)
    res = predict_df(df_new, threshold=thresh)
    st.dataframe(res[["prediction","pred_proba_dissatisfied"] + ALL_FEATURES], use_container_width=True)


Writing /content/app.py


In [None]:
# Jalankan Streamlit di background, log ke file
!streamlit run /content/app.py --server.headless true --server.address 0.0.0.0 --server.port 8501 > /content/streamlit.log 2>&1 &

# Tampilkan beberapa baris log awal (cek kalau ada error)
!sed -n '1,120p' /content/streamlit.log

# Buka tunnel; URL trycloudflare.com akan muncul di output
!cloudflared tunnel --url http://localhost:8501 --no-autoupdate


[90m2025-09-04T16:11:57Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2025-09-04T16:11:57Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
[90m2025-09-04T16:12:00Z[0m [32mINF[0m +--------------------------------------------------------------------------------------------+
[90m2025-09-04T16:12:00Z[0m [32mINF[0m |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
[90m2025