In [None]:
#WITHOUT PIPELINE

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression

# Load data (already in your notebook as df)
X = df[["Daily Time Spent on Site", "Age", "Area Income", "Daily Internet Usage", "Gender"]]
y = df["Clicked on Ad"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 1. Scale numeric columns
scaler = StandardScaler()
X_train_num = scaler.fit_transform(X_train[["Daily Time Spent on Site", "Age", "Area Income", "Daily Internet Usage"]])
X_test_num  = scaler.transform(X_test[["Daily Time Spent on Site", "Age", "Area Income", "Daily Internet Usage"]])

# 2. Encode categorical column (Gender)
encoder = OneHotEncoder(drop="if_binary", handle_unknown="ignore")
X_train_cat = encoder.fit_transform(X_train[["Gender"]])
X_test_cat  = encoder.transform(X_test[["Gender"]])

# 3. Combine numeric + categorical
import numpy as np
from scipy.sparse import hstack
X_train_final = hstack([X_train_num, X_train_cat])
X_test_final  = hstack([X_test_num, X_test_cat])

# 4. Fit model
model = LogisticRegression()
model.fit(X_train_final, y_train)

# 5. Predict
y_pred = model.predict(X_test_final)


In [None]:
#WITH PIPELINE

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numeric_cols = ["Daily Time Spent on Site", "Age", "Area Income", "Daily Internet Usage"]
categorical_cols = ["Gender"]

# Preprocessor = scale numeric + one-hot categorical
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_cols),
    ("cat", OneHotEncoder(drop="if_binary", handle_unknown="ignore"), categorical_cols)
])

# Full pipeline = preprocessing + model
clf = Pipeline([
    ("pre", preprocessor),
    ("model", LogisticRegression())
])

# Fit & predict (no manual scaling/encoding!)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
