### Predictive model for  Coupon acceptance

In [155]:
#Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [156]:
data = pd.read_csv(r"D:\Dataset\ML\Classification\Ds_Data.csv")
data.head()

Unnamed: 0,destination,passanger,weather,temperature,coupon,expiration,gender,age,maritalStatus,has_children,...,CoffeeHouse,CarryAway,RestaurantLessThan20,Restaurant20To50,toCoupon_GEQ5min,toCoupon_GEQ15min,toCoupon_GEQ25min,direction_same,direction_opp,Accept(Y/N?)
0,No Urgent Place,Alone,Sunny,55,Restaurant(<20),1d,Female,21,Unmarried partner,1,...,never,,4~8,1~3,1,0,0,0,1,1
1,No Urgent Place,Friend(s),Sunny,80,Coffee House,2h,Female,21,Unmarried partner,1,...,never,,4~8,1~3,1,0,0,0,1,0
2,No Urgent Place,Friend(s),Sunny,80,Carry out & Take away,2h,Female,21,Unmarried partner,1,...,never,,4~8,1~3,1,1,0,0,1,1
3,No Urgent Place,Friend(s),Sunny,80,Coffee House,2h,Female,21,Unmarried partner,1,...,never,,4~8,1~3,1,1,0,0,1,0
4,No Urgent Place,Friend(s),Sunny,80,Coffee House,1d,Female,21,Unmarried partner,1,...,never,,4~8,1~3,1,1,0,0,1,0


In [157]:
data.isnull().sum()

destination                 0
passanger                   0
weather                     0
temperature                 0
coupon                      0
expiration                  0
gender                      0
age                         0
maritalStatus               0
has_children                0
education                   0
occupation                  0
income                      0
car                     12576
Bar                       107
CoffeeHouse               217
CarryAway                 151
RestaurantLessThan20      130
Restaurant20To50          189
toCoupon_GEQ5min            0
toCoupon_GEQ15min           0
toCoupon_GEQ25min           0
direction_same              0
direction_opp               0
Accept(Y/N?)                0
dtype: int64

In [158]:
data.shape

(12684, 25)

In [159]:
data.drop(columns = ['car','toCoupon_GEQ5min'],inplace = True)

#### Duplicate value treatment

In [160]:
data.duplicated().sum()

291

In [161]:
data.drop_duplicates(inplace = True)

In [162]:
data.duplicated().sum()

0

In [163]:
data.columns

Index(['destination', 'passanger', 'weather', 'temperature', 'coupon',
       'expiration', 'gender', 'age', 'maritalStatus', 'has_children',
       'education', 'occupation', 'income', 'Bar', 'CoffeeHouse', 'CarryAway',
       'RestaurantLessThan20', 'Restaurant20To50', 'toCoupon_GEQ15min',
       'toCoupon_GEQ25min', 'direction_same', 'direction_opp', 'Accept(Y/N?)'],
      dtype='object')

In [164]:
x = data.iloc[:,data.columns != 'Accept(Y/N?)']
y = data[['Accept(Y/N?)']]

In [165]:
data.columns

Index(['destination', 'passanger', 'weather', 'temperature', 'coupon',
       'expiration', 'gender', 'age', 'maritalStatus', 'has_children',
       'education', 'occupation', 'income', 'Bar', 'CoffeeHouse', 'CarryAway',
       'RestaurantLessThan20', 'Restaurant20To50', 'toCoupon_GEQ15min',
       'toCoupon_GEQ25min', 'direction_same', 'direction_opp', 'Accept(Y/N?)'],
      dtype='object')

In [166]:
numeric = ['temperature',]
ordinal = ['age','income','Bar', 'CoffeeHouse', 'CarryAway',
'RestaurantLessThan20', 'Restaurant20To50']
nominal = ['destination','passanger','weather','coupon','gender','maritalStatus',
           'has_children','education','occupation','toCoupon_GEQ15min','toCoupon_GEQ25min',
           'direction_same','direction_opp','expiration']

In [167]:
len(ordinal)

7

In [206]:
data['expiration'].value_counts()

expiration
1d    6938
2h    5455
Name: count, dtype: int64

In [169]:
cust_order1 = ['below21','21','26','31','36','41','46','50plus']
cust_order2 = ['Less than $12500','$12500 - $24999','$25000 - $37499','$37500 - $49999','$50000 - $62499','$62500 - $74999',
              '$75000 - $87499','$87500 - $99999','$100000 or More']
cust_order3 = ['never','less1','1~3','4~8','gt8']

In [170]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing  import StandardScaler
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder

In [171]:
# Define preprocessing pipelines
numeric_pipeline = Pipeline([
    ('imputer',SimpleImputer(strategy = 'median')),
    ('scaler',StandardScaler())
])

nominal_pipeline = Pipeline([
    ('imputer',SimpleImputer(strategy = 'most_frequent')),
    ('encoder',OneHotEncoder(handle_unknown = "ignore"))
     ])
ordinal_pipeline1 = Pipeline([
    ('imputer',SimpleImputer(strategy = 'most_frequent')),
    ('encoder',OrdinalEncoder(categories = [cust_order1]))
     ])#age

ordinal_pipeline2 = Pipeline([
    ('imputer',SimpleImputer(strategy = 'most_frequent')),
    ('encoder',OrdinalEncoder(categories = [cust_order2]))
     ])#income

ordinal_pipeline3 = Pipeline([
    ('imputer',SimpleImputer(strategy = 'most_frequent')),
    ('encoder',OrdinalEncoder(categories = [cust_order3]*5))
     ])#'Bar', 'CoffeeHouse', 'CarryAway','RestaurantLessThan20', 'Restaurant20To50'

In [172]:
from sklearn.compose import ColumnTransformer

In [173]:
preprocessor = ColumnTransformer([
    ("numeric", numeric_pipeline, numeric),
    ("nominal", nominal_pipeline, nominal),
    ("ordinal1", ordinal_pipeline1, ['age']),  # Single brackets for single column
    ("ordinal2", ordinal_pipeline2, ['income']),  # Single brackets for single column
    ("ordinal3", ordinal_pipeline3, ['Bar', 'CoffeeHouse', 'CarryAway', 'RestaurantLessThan20', 'Restaurant20To50'])  # No extra brackets
])

In [174]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [175]:
# **Define Multiple Models**
models = {
    "LogisticRegression": LogisticRegression(class_weight="balanced", random_state=42),
    "DecisionTree": DecisionTreeClassifier(class_weight="balanced", random_state=42),
    "RandomForest": RandomForestClassifier(class_weight="balanced", random_state=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "SVM": SVC(class_weight="balanced", probability=True, random_state=42)
    
}

In [176]:
from sklearn.model_selection import train_test_split

In [177]:
# **Train-Test Split**
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

In [178]:
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.combine import SMOTETomek
from sklearn.metrics import classification_report

In [179]:
# **Loop through models and evaluate**
for name, model in models.items():
    print(f"\n🔹 Training model: {name}")

    # Full pipeline (Preprocessing → SMOTETomek → Model)
    model_pipeline = ImbPipeline([
        ("preprocessor", preprocessor),
        ("resampling", SMOTETomek(random_state=42)),  # Handles class imbalance
        ("classifier", model)
    ])

    # **Train model**
    model_pipeline.fit(xtrain, ytrain)

    # **Predictions & Evaluation**
    ypred = model_pipeline.predict(xtest)
    print(classification_report(ytest, ypred))


🔹 Training model: LogisticRegression
              precision    recall  f1-score   support

           0       0.60      0.67      0.63      1076
           1       0.72      0.67      0.69      1403

    accuracy                           0.67      2479
   macro avg       0.66      0.67      0.66      2479
weighted avg       0.67      0.67      0.67      2479


🔹 Training model: DecisionTree
              precision    recall  f1-score   support

           0       0.60      0.63      0.62      1076
           1       0.71      0.68      0.69      1403

    accuracy                           0.66      2479
   macro avg       0.65      0.65      0.65      2479
weighted avg       0.66      0.66      0.66      2479


🔹 Training model: RandomForest
              precision    recall  f1-score   support

           0       0.71      0.64      0.68      1076
           1       0.74      0.80      0.77      1403

    accuracy                           0.73      2479
   macro avg       0.73   

In [180]:
mod_pipeline = ImbPipeline([
        ("preprocessor", preprocessor),
        ("resampling", SMOTETomek(random_state=42)),  # Handles class imbalance
        ("classifier", XGBClassifier(use_label_encoder=False, eval_metric="logloss"))
    ])

In [181]:
 # **Train model**
mod_pipeline.fit(xtrain, ytrain)

In [182]:
# **Predictions & Evaluation**
ypred = model_pipeline.predict(xtest)
print(classification_report(ytest, ypred))

              precision    recall  f1-score   support

           0       0.66      0.71      0.68      1076
           1       0.76      0.72      0.74      1403

    accuracy                           0.71      2479
   macro avg       0.71      0.71      0.71      2479
weighted avg       0.72      0.71      0.71      2479



In [183]:
import joblib

In [187]:
preprocessor = mod_pipeline.named_steps['preprocessor']
model = mod_pipeline.named_steps['classifier']

In [188]:
joblib.dump(preprocessor,'preprocessor.pkl')

['preprocessor.pkl']

In [189]:
joblib.dump(model,'model.pkl')

['model.pkl']

In [207]:
pwd

'C:\\Users\\91909\\0 Real Project\\Predictive model for Coupon acceptance'

In [209]:
!pip install Flask

