# Marketing Strategy Prediction

## Import Libraries and Data

In [4]:
# Import Data

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
import matplotlib

import matplotlib.mlab as mlab
import seaborn as sb

%matplotlib inline

In [5]:
# Import data files (code for kaggle only)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/marketing-strategy-personalised-offer/sample.csv
/kaggle/input/marketing-strategy-personalised-offer/train_data.csv
/kaggle/input/marketing-strategy-personalised-offer/test_data.csv


In [6]:
# Read training data and display

train_data = pd.read_csv("/kaggle/input/marketing-strategy-personalised-offer/train_data.csv")
train_data.head(5)

Unnamed: 0,offer expiration,income_range,no_visited_Cold drinks,travelled_more_than_15mins_for_offer,Restaur_spend_less_than20,Marital Status,restaurant type,age,Prefer western over chinese,travelled_more_than_25mins_for_offer,...,restuarant_opposite_direction_house,has Children,visit restaurant with rating (avg),temperature,Restaur_spend_greater_than20,Travel Time,Climate,drop location,Prefer home food,Offer Accepted
0,2days,₹100000 or More,4~8,1,less1,Married partner,4 star restaurant,36,0,0,...,0,0,4,67,less1,22,Spring,Location B,0,No
1,2days,₹87500 - ₹99999,4~8,0,4~8,Married partner,Take-away restaurant,50plus,0,0,...,0,1,3,89,1~3,18,Summer,Location B,0,Yes
2,2days,₹87500 - ₹99999,less1,1,1~3,Single,Cold drinks,26,1,0,...,1,1,4,67,less1,7,Winter,Location A,1,Yes
3,10hours,₹37500 - ₹49999,less1,0,1~3,Single,Take-away restaurant,46,1,0,...,0,1,3,89,1~3,7,Summer,Location C,0,No
4,2days,₹100000 or More,never,1,1~3,Single,4 star restaurant,21,0,1,...,1,0,3,40,less1,7,Summer,Location C,0,No


In [None]:
train_data.shape

In [9]:
train_data['temperature'].min()

40

In [None]:
test_data = pd.read_csv("/kaggle/input/marketing-strategy-personalised-offer/test_data.csv")
test_data.head()

In [None]:
test_data.shape

In [None]:
columns = train_data.columns
print(columns)

## Replace missing values with most common value

In [None]:
train_data.drop('car', axis=1, inplace=True)

In [None]:
# filling with most common class
train_data = train_data.apply(lambda x: x.fillna(x.value_counts().index[0]))

In [None]:
test_data.drop('car', axis=1, inplace=True)

In [None]:
# filling with most common class
test_data = test_data.apply(lambda x: x.fillna(x.value_counts().index[0]))

## Dealing with Categorical Variables - Tackle Each column (Train Data)

### Convert 'Offer Expiration' column to numerical

In [None]:
train_data['offer expiration'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['offer expiration'][i] == '2days':
    train_data['offer expiration'][i] = 24
  elif train_data['offer expiration'][i] == '10hours':
    train_data['offer expiration'][i] = 10

In [None]:
train_data.head()

### Convert 'income_range' column to numerical

In [None]:
train_data['income_range'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['income_range'][i] == 'Less than ₹12500':
    train_data['income_range'][i] = 1
  elif train_data['income_range'][i] == '₹12500 - ₹24999':
    train_data['income_range'][i] = 2
  elif train_data['income_range'][i] == '₹25000 - ₹37499':
    train_data['income_range'][i] = 3
  elif train_data['income_range'][i] == '₹37500 - ₹49999':
    train_data['income_range'][i] = 4
  elif train_data['income_range'][i] == '₹50000 - ₹62499':
    train_data['income_range'][i] = 5
  elif train_data['income_range'][i] == '₹62500 - ₹74999':
    train_data['income_range'][i] = 6
  elif train_data['income_range'][i] == '₹75000 - ₹87499':
    train_data['income_range'][i] = 7
  elif train_data['income_range'][i] == '₹87500 - ₹99999':
    train_data['income_range'][i] = 8
  elif train_data['income_range'][i] == '₹100000 or More':
    train_data['income_range'][i] = 9


In [None]:
train_data.head()

### Convert 'no_visited_Cold drinks' column to numerical



In [None]:
train_data['no_visited_Cold drinks'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['no_visited_Cold drinks'][i] == 'never':
    train_data['no_visited_Cold drinks'][i] = 0
  elif train_data['no_visited_Cold drinks'][i] == 'less1':
    train_data['no_visited_Cold drinks'][i] = 1
  elif train_data['no_visited_Cold drinks'][i] == '1~3':
    train_data['no_visited_Cold drinks'][i] = 2
  elif train_data['no_visited_Cold drinks'][i] == '4~8':
    train_data['no_visited_Cold drinks'][i] = 3
  elif train_data['no_visited_Cold drinks'][i] == 'gt8':
    train_data['no_visited_Cold drinks'][i] = 4

In [None]:
train_data.head()

### 'travelled_more_than_15mins_for_offer' is already numerical

In [None]:
train_data['travelled_more_than_15mins_for_offer'].unique()

### Convert 'Restaur_spend_less_than20' column to numerical

In [None]:
train_data['Restaur_spend_less_than20'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Restaur_spend_less_than20'][i] == 'never':
    train_data['Restaur_spend_less_than20'][i] = 0
  elif train_data['Restaur_spend_less_than20'][i] == 'less1':
    train_data['Restaur_spend_less_than20'][i] = 1
  elif train_data['Restaur_spend_less_than20'][i] == '1~3':
    train_data['Restaur_spend_less_than20'][i] = 2
  elif train_data['Restaur_spend_less_than20'][i] == '4~8':
    train_data['Restaur_spend_less_than20'][i] = 3
  elif train_data['Restaur_spend_less_than20'][i] == 'gt8':
    train_data['Restaur_spend_less_than20'][i] = 4

In [None]:
train_data.head()

### Convert 'Marital Status' column to numerical

In [None]:
train_data['Marital Status'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Marital Status'][i] == 'Single':
    train_data['Marital Status'][i] = 0
  elif train_data['Marital Status'][i] == 'Unmarried partner':
    train_data['Marital Status'][i] = 1
  elif train_data['Marital Status'][i] == 'Married partner':
    train_data['Marital Status'][i] = 2
  elif train_data['Marital Status'][i] == 'Divorced':
    train_data['Marital Status'][i] = 3
  elif train_data['Marital Status'][i] == 'Widowed':
    train_data['Marital Status'][i] = 4

In [None]:
train_data.head()

### Convert 'Restaurant' column to numerical

In [None]:
train_data['restaurant type'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['restaurant type'][i] == 'Cold drinks':
    train_data['restaurant type'][i] = 0
  elif train_data['restaurant type'][i] == 'Take-away restaurant':
    train_data['restaurant type'][i] = 1
  elif train_data['restaurant type'][i] == 'Restaurant with pub':
    train_data['restaurant type'][i] = 2
  elif train_data['restaurant type'][i] == '2 star restaurant':
    train_data['restaurant type'][i] = 3
  elif train_data['restaurant type'][i] == '4 star restaurant':
    train_data['restaurant type'][i] = 4

In [None]:
train_data.head()

### Convert 'age' column to numerical

In [None]:
train_data['age'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['age'][i] == 'below21':
    train_data['age'][i] = 20
  elif train_data['age'][i] == '50plus':
    train_data['age'][i] = 50

In [None]:
train_data.head()

### Convert 'no_visited_bars' column to numerical

In [None]:
train_data['no_visited_bars'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['no_visited_bars'][i] == 'never':
    train_data['no_visited_bars'][i] = 0
  elif train_data['no_visited_bars'][i] == 'less1':
    train_data['no_visited_bars'][i] = 1
  elif train_data['no_visited_bars'][i] == '1~3':
    train_data['no_visited_bars'][i] = 2
  elif train_data['no_visited_bars'][i] == '4~8':
    train_data['no_visited_bars'][i] = 3
  elif train_data['no_visited_bars'][i] == 'gt8':
    train_data['no_visited_bars'][i] = 4

### Convert 'gender' column to numerical

In [None]:
train_data['gender'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['gender'][i] == 'Female':
    train_data['gender'][i] = 0
  elif train_data['gender'][i] == 'Male':
    train_data['gender'][i] = 1

### drop 'car' column

### Convert 'Customer Type' column to numerical

In [None]:
train_data['Customer type'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Customer type'][i] == 'Individual':
    train_data['Customer type'][i] = 0
  elif train_data['Customer type'][i] == 'With Family':
    train_data['Customer type'][i] = 1
  elif train_data['Customer type'][i] == 'With Kids':
    train_data['Customer type'][i] = 2
  elif train_data['Customer type'][i] == 'With Colleagues':
    train_data['Customer type'][i] = 3

### Convert 'Qualification' column to numerical

In [None]:
train_data['Qualification'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Qualification'][i] == 'Some High School':
    train_data['Qualification'][i] = 1
  elif train_data['Qualification'][i] == 'High School Graduate':
    train_data['Qualification'][i] = 2
  elif train_data['Qualification'][i] == 'Some college - no degree':
    train_data['Qualification'][i] = 3
  elif train_data['Qualification'][i] == 'Associates degree':
    train_data['Qualification'][i] = 4
  elif train_data['Qualification'][i] == 'Bachelors degree':
    train_data['Qualification'][i] = 5
  elif train_data['Qualification'][i] == 'Graduate degree (Masters or Doctorate)':
    train_data['Qualification'][i] = 6

### Convert 'no_Take-aways' column to numerical

In [None]:
train_data['no_Take-aways'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['no_Take-aways'][i] == 'never':
    train_data['no_Take-aways'][i] = 0
  elif train_data['no_Take-aways'][i] == 'less1':
    train_data['no_Take-aways'][i] = 1
  elif train_data['no_Take-aways'][i] == '1~3':
    train_data['no_Take-aways'][i] = 2
  elif train_data['no_Take-aways'][i] == '4~8':
    train_data['no_Take-aways'][i] = 3
  elif train_data['no_Take-aways'][i] == 'gt8':
    train_data['no_Take-aways'][i] = 4

### Convert 'Job' column to numerical

In [None]:
train_data['Job/Job Industry'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Job/Job Industry'][i] == 'Unemployed':
    train_data['Job/Job Industry'][i] = 0
  elif train_data['Job/Job Industry'][i] == 'Arts Design Entertainment Sports & Media':
    train_data['Job/Job Industry'][i] = 1
  elif train_data['Job/Job Industry'][i] == 'Sales & Related':
    train_data['Job/Job Industry'][i] = 2
  elif train_data['Job/Job Industry'][i] == 'Student':
    train_data['Job/Job Industry'][i] = 3
  elif train_data['Job/Job Industry'][i] == 'Business & Financial':
    train_data['Job/Job Industry'][i] = 4
  elif train_data['Job/Job Industry'][i] == 'Computer & Mathematical':
    train_data['Job/Job Industry'][i] = 5
  elif train_data['Job/Job Industry'][i] == 'Office & Administrative Support':
    train_data['Job/Job Industry'][i] = 6
  elif train_data['Job/Job Industry'][i] == 'Management':
    train_data['Job/Job Industry'][i] = 7
  elif train_data['Job/Job Industry'][i] == 'Healthcare Support':
    train_data['Job/Job Industry'][i] = 8
  elif train_data['Job/Job Industry'][i] == 'Life Physical Social Science':
    train_data['Job/Job Industry'][i] = 9
  elif train_data['Job/Job Industry'][i] == 'Installation Maintenance & Repair':
    train_data['Job/Job Industry'][i] = 10
  elif train_data['Job/Job Industry'][i] == 'Legal':
    train_data['Job/Job Industry'][i] = 11
  elif train_data['Job/Job Industry'][i] == 'Community & Social Services':
    train_data['Job/Job Industry'][i] = 12
  elif train_data['Job/Job Industry'][i] == 'Education&Training&Library':
    train_data['Job/Job Industry'][i] = 13
  elif train_data['Job/Job Industry'][i] == 'Construction & Extraction':
    train_data['Job/Job Industry'][i] = 14
  elif train_data['Job/Job Industry'][i] == 'Healthcare Practitioners & Technical':
    train_data['Job/Job Industry'][i] = 15
  elif train_data['Job/Job Industry'][i] == 'Transportation & Material Moving':
    train_data['Job/Job Industry'][i] = 16
  elif train_data['Job/Job Industry'][i] == 'Retired':
    train_data['Job/Job Industry'][i] = 17
  elif train_data['Job/Job Industry'][i] == 'Architecture & Engineering':
    train_data['Job/Job Industry'][i] = 18
  elif train_data['Job/Job Industry'][i] == 'Production Occupations':
    train_data['Job/Job Industry'][i] = 19
  elif train_data['Job/Job Industry'][i] == 'Farming Fishing & Forestry':
    train_data['Job/Job Industry'][i] = 20
  elif train_data['Job/Job Industry'][i] == 'Protective Service':
    train_data['Job/Job Industry'][i] = 21
  elif train_data['Job/Job Industry'][i] == 'Personal Care & Service':
    train_data['Job/Job Industry'][i] = 22
  elif train_data['Job/Job Industry'][i] == 'Food Preparation & Serving Related':
    train_data['Job/Job Industry'][i] = 23
  elif train_data['Job/Job Industry'][i] == 'Building & Grounds Cleaning & Maintenance':
    train_data['Job/Job Industry'][i] = 24

### Convert 'Restaur_spend_greater_than20'' column to numerical

In [None]:
train_data['Restaur_spend_greater_than20'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Restaur_spend_greater_than20'][i] == 'never':
    train_data['Restaur_spend_greater_than20'][i] = 0
  elif train_data['Restaur_spend_greater_than20'][i] == 'less1':
    train_data['Restaur_spend_greater_than20'][i] = 1
  elif train_data['Restaur_spend_greater_than20'][i] == '1~3':
    train_data['Restaur_spend_greater_than20'][i] = 2
  elif train_data['Restaur_spend_greater_than20'][i] == '4~8':
    train_data['Restaur_spend_greater_than20'][i] = 3
  elif train_data['Restaur_spend_greater_than20'][i] == 'gt8':
    train_data['Restaur_spend_greater_than20'][i] = 4

### Convert 'Climate' column to numerical

In [None]:
train_data['Climate'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['Climate'][i] == 'Spring':
    train_data['Climate'][i] = 1
  elif train_data['Climate'][i] == 'Summer':
    train_data['Climate'][i] = 2
  elif train_data['Climate'][i] == 'Winter':
    train_data['Climate'][i] = 3

### Convert 'Drop Location' column to numerical

In [None]:
train_data['drop location'].unique()

In [None]:
for i in range(0, 12379):
  if train_data['drop location'][i] == 'Location A':
    train_data['drop location'][i] = 1
  elif train_data['drop location'][i] == 'Location B':
    train_data['drop location'][i] = 2
  elif train_data['drop location'][i] == 'Location C':
    train_data['drop location'][i] = 3

In [None]:
train_data.head()

## Dealing with Categorical Variables - Tackle Each column (Train Data)

### Convert 'Offer Expiration' column to numerical

In [None]:
test_data['offer expiration'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['offer expiration'][i] == '2days':
    test_data['offer expiration'][i] = 24
  elif test_data['offer expiration'][i] == '10hours':
    test_data['offer expiration'][i] = 10

In [None]:
test_data.head()

### Convert 'income_range' column to numerical

In [None]:
test_data['income_range'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['income_range'][i] == 'Less than ₹12500':
    test_data['income_range'][i] = 1
  elif test_data['income_range'][i] == '₹12500 - ₹24999':
    test_data['income_range'][i] = 2
  elif test_data['income_range'][i] == '₹25000 - ₹37499':
    test_data['income_range'][i] = 3
  elif test_data['income_range'][i] == '₹37500 - ₹49999':
    test_data['income_range'][i] = 4
  elif test_data['income_range'][i] == '₹50000 - ₹62499':
    test_data['income_range'][i] = 5
  elif test_data['income_range'][i] == '₹62500 - ₹74999':
    test_data['income_range'][i] = 6
  elif test_data['income_range'][i] == '₹75000 - ₹87499':
    test_data['income_range'][i] = 7
  elif test_data['income_range'][i] == '₹87500 - ₹99999':
    test_data['income_range'][i] = 8
  elif test_data['income_range'][i] == '₹100000 or More':
    test_data['income_range'][i] = 9


In [None]:
test_data.head()

### Convert 'no_visited_Cold drinks' column to numerical



In [None]:
test_data['no_visited_Cold drinks'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['no_visited_Cold drinks'][i] == 'never':
    test_data['no_visited_Cold drinks'][i] = 0
  elif test_data['no_visited_Cold drinks'][i] == 'less1':
    test_data['no_visited_Cold drinks'][i] = 1
  elif test_data['no_visited_Cold drinks'][i] == '1~3':
    test_data['no_visited_Cold drinks'][i] = 2
  elif test_data['no_visited_Cold drinks'][i] == '4~8':
    test_data['no_visited_Cold drinks'][i] = 3
  elif test_data['no_visited_Cold drinks'][i] == 'gt8':
    test_data['no_visited_Cold drinks'][i] = 4

In [None]:
test_data.head()

### 'travelled_more_than_15mins_for_offer' is already numerical

In [None]:
test_data['travelled_more_than_15mins_for_offer'].unique()

### Convert 'Restaur_spend_less_than20' column to numerical

In [None]:
test_data['Restaur_spend_less_than20'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Restaur_spend_less_than20'][i] == 'never':
    test_data['Restaur_spend_less_than20'][i] = 0
  elif test_data['Restaur_spend_less_than20'][i] == 'less1':
    test_data['Restaur_spend_less_than20'][i] = 1
  elif test_data['Restaur_spend_less_than20'][i] == '1~3':
    test_data['Restaur_spend_less_than20'][i] = 2
  elif test_data['Restaur_spend_less_than20'][i] == '4~8':
    test_data['Restaur_spend_less_than20'][i] = 3
  elif test_data['Restaur_spend_less_than20'][i] == 'gt8':
    test_data['Restaur_spend_less_than20'][i] = 4

In [None]:
test_data.head()

### Convert 'Marital Status' column to numerical

In [None]:
test_data['Marital Status'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Marital Status'][i] == 'Single':
    test_data['Marital Status'][i] = 0
  elif test_data['Marital Status'][i] == 'Unmarried partner':
    test_data['Marital Status'][i] = 1
  elif test_data['Marital Status'][i] == 'Married partner':
    test_data['Marital Status'][i] = 2
  elif test_data['Marital Status'][i] == 'Divorced':
    test_data['Marital Status'][i] = 3
  elif test_data['Marital Status'][i] == 'Widowed':
    test_data['Marital Status'][i] = 4

In [None]:
test_data.head()

### Convert 'Restaurant' column to numerical

In [None]:
test_data['restaurant type'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['restaurant type'][i] == 'Cold drinks':
    test_data['restaurant type'][i] = 0
  elif test_data['restaurant type'][i] == 'Take-away restaurant':
    test_data['restaurant type'][i] = 1
  elif test_data['restaurant type'][i] == 'Restaurant with pub':
    test_data['restaurant type'][i] = 2
  elif test_data['restaurant type'][i] == '2 star restaurant':
    test_data['restaurant type'][i] = 3
  elif test_data['restaurant type'][i] == '4 star restaurant':
    test_data['restaurant type'][i] = 4

In [None]:
test_data.head()

### Convert 'age' column to numerical

In [None]:
test_data['age'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['age'][i] == 'below21':
    test_data['age'][i] = 20
  elif test_data['age'][i] == '50plus':
    test_data['age'][i] = 50

In [None]:
test_data.head()

### Convert 'no_visited_bars' column to numerical

In [None]:
test_data['no_visited_bars'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['no_visited_bars'][i] == 'never':
    test_data['no_visited_bars'][i] = 0
  elif test_data['no_visited_bars'][i] == 'less1':
    test_data['no_visited_bars'][i] = 1
  elif test_data['no_visited_bars'][i] == '1~3':
    test_data['no_visited_bars'][i] = 2
  elif test_data['no_visited_bars'][i] == '4~8':
    test_data['no_visited_bars'][i] = 3
  elif test_data['no_visited_bars'][i] == 'gt8':
    test_data['no_visited_bars'][i] = 4

### Convert 'gender' column to numerical

In [None]:
test_data['gender'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['gender'][i] == 'Female':
    test_data['gender'][i] = 0
  elif test_data['gender'][i] == 'Male':
    test_data['gender'][i] = 1

### drop 'car' column

### Convert 'Customer Type' column to numerical

In [None]:
test_data['Customer type'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Customer type'][i] == 'Individual':
    test_data['Customer type'][i] = 0
  elif test_data['Customer type'][i] == 'With Family':
    test_data['Customer type'][i] = 1
  elif test_data['Customer type'][i] == 'With Kids':
    test_data['Customer type'][i] = 2
  elif test_data['Customer type'][i] == 'With Colleagues':
    test_data['Customer type'][i] = 3

### Convert 'Qualification' column to numerical

In [None]:
test_data['Qualification'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Qualification'][i] == 'Some High School':
    test_data['Qualification'][i] = 1
  elif test_data['Qualification'][i] == 'High School Graduate':
    test_data['Qualification'][i] = 2
  elif test_data['Qualification'][i] == 'Some college - no degree':
    test_data['Qualification'][i] = 3
  elif test_data['Qualification'][i] == 'Associates degree':
    test_data['Qualification'][i] = 4
  elif test_data['Qualification'][i] == 'Bachelors degree':
    test_data['Qualification'][i] = 5
  elif test_data['Qualification'][i] == 'Graduate degree (Masters or Doctorate)':
    test_data['Qualification'][i] = 6

### Convert 'no_Take-aways' column to numerical

In [None]:
test_data['no_Take-aways'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['no_Take-aways'][i] == 'never':
    test_data['no_Take-aways'][i] = 0
  elif test_data['no_Take-aways'][i] == 'less1':
    test_data['no_Take-aways'][i] = 1
  elif test_data['no_Take-aways'][i] == '1~3':
    test_data['no_Take-aways'][i] = 2
  elif test_data['no_Take-aways'][i] == '4~8':
    test_data['no_Take-aways'][i] = 3
  elif test_data['no_Take-aways'][i] == 'gt8':
    test_data['no_Take-aways'][i] = 4

### Convert 'Job' column to numerical

In [None]:
test_data['Job/Job Industry'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Job/Job Industry'][i] == 'Unemployed':
    test_data['Job/Job Industry'][i] = 0
  elif test_data['Job/Job Industry'][i] == 'Arts Design Entertainment Sports & Media':
    test_data['Job/Job Industry'][i] = 1
  elif test_data['Job/Job Industry'][i] == 'Sales & Related':
    test_data['Job/Job Industry'][i] = 2
  elif test_data['Job/Job Industry'][i] == 'Student':
    test_data['Job/Job Industry'][i] = 3
  elif test_data['Job/Job Industry'][i] == 'Business & Financial':
    test_data['Job/Job Industry'][i] = 4
  elif test_data['Job/Job Industry'][i] == 'Computer & Mathematical':
    test_data['Job/Job Industry'][i] = 5
  elif test_data['Job/Job Industry'][i] == 'Office & Administrative Support':
    test_data['Job/Job Industry'][i] = 6
  elif test_data['Job/Job Industry'][i] == 'Management':
    test_data['Job/Job Industry'][i] = 7
  elif test_data['Job/Job Industry'][i] == 'Healthcare Support':
    test_data['Job/Job Industry'][i] = 8
  elif test_data['Job/Job Industry'][i] == 'Life Physical Social Science':
    test_data['Job/Job Industry'][i] = 9
  elif test_data['Job/Job Industry'][i] == 'Installation Maintenance & Repair':
    test_data['Job/Job Industry'][i] = 10
  elif test_data['Job/Job Industry'][i] == 'Legal':
    test_data['Job/Job Industry'][i] = 11
  elif test_data['Job/Job Industry'][i] == 'Community & Social Services':
    test_data['Job/Job Industry'][i] = 12
  elif test_data['Job/Job Industry'][i] == 'Education&Training&Library':
    test_data['Job/Job Industry'][i] = 13
  elif test_data['Job/Job Industry'][i] == 'Construction & Extraction':
    test_data['Job/Job Industry'][i] = 14
  elif test_data['Job/Job Industry'][i] == 'Healthcare Practitioners & Technical':
    test_data['Job/Job Industry'][i] = 15
  elif test_data['Job/Job Industry'][i] == 'Transportation & Material Moving':
    test_data['Job/Job Industry'][i] = 16
  elif test_data['Job/Job Industry'][i] == 'Retired':
    test_data['Job/Job Industry'][i] = 17
  elif test_data['Job/Job Industry'][i] == 'Architecture & Engineering':
    test_data['Job/Job Industry'][i] = 18
  elif test_data['Job/Job Industry'][i] == 'Production Occupations':
    test_data['Job/Job Industry'][i] = 19
  elif test_data['Job/Job Industry'][i] == 'Farming Fishing & Forestry':
    test_data['Job/Job Industry'][i] = 20
  elif test_data['Job/Job Industry'][i] == 'Protective Service':
    test_data['Job/Job Industry'][i] = 21
  elif test_data['Job/Job Industry'][i] == 'Personal Care & Service':
    test_data['Job/Job Industry'][i] = 22
  elif test_data['Job/Job Industry'][i] == 'Food Preparation & Serving Related':
    test_data['Job/Job Industry'][i] = 23
  elif test_data['Job/Job Industry'][i] == 'Building & Grounds Cleaning & Maintenance':
    test_data['Job/Job Industry'][i] = 24

### Convert 'Restaur_spend_greater_than20'' column to numerical

In [None]:
test_data['Restaur_spend_greater_than20'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Restaur_spend_greater_than20'][i] == 'never':
    test_data['Restaur_spend_greater_than20'][i] = 0
  elif test_data['Restaur_spend_greater_than20'][i] == 'less1':
    test_data['Restaur_spend_greater_than20'][i] = 1
  elif test_data['Restaur_spend_greater_than20'][i] == '1~3':
    test_data['Restaur_spend_greater_than20'][i] = 2
  elif test_data['Restaur_spend_greater_than20'][i] == '4~8':
    test_data['Restaur_spend_greater_than20'][i] = 3
  elif test_data['Restaur_spend_greater_than20'][i] == 'gt8':
    test_data['Restaur_spend_greater_than20'][i] = 4

### Convert 'Climate' column to numerical

In [None]:
test_data['Climate'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['Climate'][i] == 'Spring':
    test_data['Climate'][i] = 1
  elif test_data['Climate'][i] == 'Summer':
    test_data['Climate'][i] = 2
  elif test_data['Climate'][i] == 'Winter':
    test_data['Climate'][i] = 3

### Convert 'Drop Location' column to numerical

In [None]:
test_data['drop location'].unique()

In [None]:
for i in range(0, 5305):
  if test_data['drop location'][i] == 'Location A':
    test_data['drop location'][i] = 1
  elif test_data['drop location'][i] == 'Location B':
    test_data['drop location'][i] = 2
  elif test_data['drop location'][i] == 'Location C':
    test_data['drop location'][i] = 3

In [None]:
test_data.head()

## Split Data

In [None]:
# Separating features and target variables for training data
X_train = train_data[['offer expiration', 'income_range', 'no_visited_Cold drinks',
       'travelled_more_than_15mins_for_offer', 'Restaur_spend_less_than20',
       'Marital Status', 'restaurant type', 'age',
       'Prefer western over chinese', 'travelled_more_than_25mins_for_offer',
       'travelled_more_than_5mins_for_offer', 'no_visited_bars', 'gender', 'restuarant_same_direction_house', 'Cooks regularly',
       'Customer type', 'Qualification', 'is foodie', 'no_Take-aways',
       'Job/Job Industry', 'restuarant_opposite_direction_house',
       'has Children', 'visit restaurant with rating (avg)', 'temperature',
       'Restaur_spend_greater_than20', 'Travel Time', 'Climate',
       'drop location', 'Prefer home food']]
y_train = train_data['Offer Accepted']

In [None]:
# Separate useful features for testing data (remove ID col)
X_test = test_data[['offer expiration', 'income_range', 'no_visited_Cold drinks',
       'travelled_more_than_15mins_for_offer', 'Restaur_spend_less_than20',
       'Marital Status', 'restaurant type', 'age',
       'Prefer western over chinese', 'travelled_more_than_25mins_for_offer',
       'travelled_more_than_5mins_for_offer', 'no_visited_bars', 'gender',
        'restuarant_same_direction_house', 'Cooks regularly',
       'Customer type', 'Qualification', 'is foodie', 'no_Take-aways',
       'Job/Job Industry', 'restuarant_opposite_direction_house',
       'has Children', 'visit restaurant with rating (avg)', 'temperature',
       'Restaur_spend_greater_than20', 'Travel Time', 'Climate',
       'drop location', 'Prefer home food']]

# Logistic Regression

In [None]:
X_train_lr = X_train.copy()
y_train_lr = y_train.copy()
X_test_lr = X_test.copy()

In [None]:
# Standardization

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train_lr = sc.fit_transform(X_train_lr)
X_test_lr = sc.transform(X_test_lr)

In [None]:
# Importing the required Libraries

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [None]:
# Defining model

lr = LogisticRegression()
lr.fit(X_train_lr, y_train_lr)

In [None]:
# Predicting the values for x-test

y_pred_lr = lr.predict(X_test_lr)

In [None]:
# Check what is in y_pred (because we have used predict_proba (gives probability) instead of predict (gives binary values))

for y in range(10):
  print(y_pred_lr[y])

# pred_y is in the form [Probability of prediction being 0, Probability of prediction being 1]

## Excel File for LogReg

In [None]:
test_data_lr = test_data.copy()

In [None]:
# Check Test Data before adding Predictions

test_data_lr.head()

In [None]:
# Add a list of 0-5304 as 'id' column in test_data

test_data_lr['id'] = list(range(0, 5305))

In [None]:
# Add y_pred as column 'Offer Accepted' in test_data

test_data_lr['Offer Accepted'] = y_pred_lr

In [None]:
# Check if 'Offer Accepted' column has been added to test_data

test_data_lr.head()

In [None]:
# Convert ID and Predictions to excel file

output = test_data_lr[['id', 'Offer Accepted']]
output.to_csv('/kaggle/working/MSP_LogReg.csv', index=0)
output

# Random Forest

In [None]:
X_train_rf = X_train.copy()
y_train_rf = y_train.copy()
X_test_rf = X_test.copy()

In [None]:
# Importing the required libraries

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

In [None]:
# Defining Model

rf = RandomForestClassifier()
rf.fit(X_train_rf, y_train_rf)

In [None]:
# Predicting the value for X_test

y_pred_rf = rf.predict(X_test_rf)

In [None]:
y_pred_rf

## Excel File for RandFor

In [None]:
test_data_rf = test_data.copy()

In [None]:
# Check Test Data before adding Predictions

test_data_rf.head()

In [None]:
# Add a list of 0-5304 as 'id' column in test_data

test_data_rf['id'] = list(range(0, 5305))

In [None]:
# Add y_pred as column 'Offer Accepted' in test_data

test_data_rf['Offer Accepted'] = y_pred_rf

In [None]:
# Check if 'Offer Accepted' column has been added to test_data

test_data_rf.head()

In [None]:
# Convert ID and Predictions to excel file

output = test_data_rf[['id', 'Offer Accepted']]
output.to_csv('/kaggle/working/MSP_RandFor.csv', index=0)
output

# AdaBoost

In [None]:
X_train_adb = X_train.copy()
y_train_adb = y_train.copy()
X_test_adb = X_test.copy()

In [None]:
# Importing required libraries

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
# Model Fitting

adb = AdaBoostClassifier(DecisionTreeClassifier(min_samples_split=10,max_depth=4),n_estimators=10,learning_rate=0.6)
adb.fit(X_train_adb, y_train_adb)

In [None]:
# Predicting the value for X_test

y_pred_adb = adb.predict(X_test_adb)

In [None]:
y_pred_adb

## Excel File for AdaBoost

In [None]:
test_data_adb = test_data.copy()

In [None]:
# Check Test Data before adding Predictions

test_data_adb.head()

In [None]:
# Add a list of 0-5304 as 'id' column in test_data

test_data_adb['id'] = list(range(0, 5305))

In [None]:
# Add y_pred as column 'Offer Accepted' in test_data

test_data_adb['Offer Accepted'] = y_pred_adb

In [None]:
# Check if 'Offer Accepted' column has been added to test_data

test_data_adb.head()

In [None]:
# Convert ID and Predictions to excel file

output = test_data_adb[['id', 'Offer Accepted']]
output.to_csv('/kaggle/working/submission.csv', index=0)
output