<a href="https://colab.research.google.com/github/JohriSumati-ops/BA-CUSTOMER-BOOKING-PREDICTOR-MODEL/blob/main/TASK2_BA_Customer_Booking_Model_BuildUp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# Load data
df = pd.read_csv("customer_booking.csv", encoding="latin1")

# Separate features & target
X = df.drop("booking_complete", axis=1)
y = df["booking_complete"]

# Encode categorical variables
cat_cols = X.select_dtypes(include="object").columns
encoder = LabelEncoder()

for col in cat_cols:
    X[col] = encoder.fit_transform(X[col])

# Save feature names
feature_columns = X.columns.tolist()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train model
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight="balanced"
)

model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))

# Save model & features
joblib.dump(model, "booking_model.pkl")
joblib.dump(feature_columns, "features.pkl")


Accuracy: 0.8525
ROC AUC: 0.7840077852636822


['features.pkl']

In [34]:
import streamlit as st
import pandas as pd
import joblib

# Load model
model = joblib.load("booking_model.pkl")
features = joblib.load("features.pkl")

st.set_page_config(page_title="BA Booking Predictor", layout="centered")

st.title("✈️ Flight Booking Completion Predictor")
st.write("Predict whether a customer will complete their booking")

st.sidebar.header("Customer Details")

num_passengers = st.sidebar.slider("Passengers", 1, 9, 1)
purchase_lead = st.sidebar.slider("Purchase Lead (days)", 1, 365, 30)
length_of_stay = st.sidebar.slider("Length of Stay (days)", 1, 60, 7)
flight_duration = st.sidebar.slider("Flight Duration (hrs)", 1.0, 20.0, 6.0)

sales_channel = st.sidebar.selectbox("Sales Channel", ["Internet", "Mobile"])
trip_type = st.sidebar.selectbox("Trip Type", ["RoundTrip", "OneWay"])
flight_day = st.sidebar.selectbox("Flight Day", ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"])
route = st.sidebar.selectbox("Route", ["AKLDEL","AKLHND","AKLSYD"])
booking_origin = st.sidebar.selectbox("Booking Origin", ["India","New Zealand","Australia"])

wants_extra_baggage = st.sidebar.selectbox("Extra Baggage", [0,1])
wants_preferred_seat = st.sidebar.selectbox("Preferred Seat", [0,1])
wants_in_flight_meals = st.sidebar.selectbox("In-flight Meals", [0,1])

# Manual encoding (must match training)
input_dict = {
    "num_passengers": num_passengers,
    "sales_channel": 1 if sales_channel=="Internet" else 0,
    "trip_type": 1 if trip_type=="RoundTrip" else 0,
    "purchase_lead": purchase_lead,
    "length_of_stay": length_of_stay,
    "flight_hour": 12,
    "flight_day": ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"].index(flight_day),
    "route": 0,
    "booking_origin": 0,
    "wants_extra_baggage": wants_extra_baggage,
    "wants_preferred_seat": wants_preferred_seat,
    "wants_in_flight_meals": wants_in_flight_meals,
    "flight_duration": flight_duration
}

input_df = pd.DataFrame([input_dict])

if st.button("Predict"):
    prediction = model.predict(input_df)[0]
    prob = model.predict_proba(input_df)[0][1]

    if prediction == 1:
        st.success(f"✅ Booking Likely to Complete ({prob:.2%})")
    else:
        st.error(f"❌ Booking Unlikely to Complete ({prob:.2%})")


