# Deep Learning Assignment: Online Shoppers Purchase Intention

## 1. Dataset Information
- **Dataset Link**: [Online Shoppers Purchasing Intention (UCI)](https://archive.ics.uci.edu/ml/datasets/Online+Shoppers+Purchasing+Intention+Dataset)  
- **Source**: UCI Machine Learning Repository  
- **Records**: ~12,000  
- **Features**: 17 (mix of numerical + categorical)  
- **Problem Type**: Classification (Purchase vs No Purchase)  
- **Context**: Predict whether a visitor will make a purchase based on browsing behavior.


In [6]:
#read data set
import pandas as pd
data=pd.read_csv("online_shoppers_intention.csv")
data.head()

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,0,0.0,0,0.0,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,0,0.0,0,0.0,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,0,0.0,0,0.0,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,False


In [7]:
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
data=data.drop(['Administrative',"Administrative_Duration",'Informational','Informational_Duration'],axis=1)
data.head()

Unnamed: 0,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,False


In [11]:
#Encoding categorical vairable
data['Weekend']=data['Weekend'].astype(int)
data['Revenue']=data['Revenue'].astype(int)
data

Unnamed: 0,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,1,1,1,1,Returning_Visitor,0,0
1,2,64.000000,0.000000,0.100000,0.000000,0.0,Feb,2,2,1,2,Returning_Visitor,0,0
2,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,4,1,9,3,Returning_Visitor,0,0
3,2,2.666667,0.050000,0.140000,0.000000,0.0,Feb,3,2,2,4,Returning_Visitor,0,0
4,10,627.500000,0.020000,0.050000,0.000000,0.0,Feb,3,3,1,4,Returning_Visitor,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12325,53,1783.791667,0.007143,0.029031,12.241717,0.0,Dec,4,6,1,1,Returning_Visitor,1,0
12326,5,465.750000,0.000000,0.021333,0.000000,0.0,Nov,3,2,1,8,Returning_Visitor,1,0
12327,6,184.250000,0.083333,0.086667,0.000000,0.0,Nov,3,2,1,13,Returning_Visitor,1,0
12328,15,346.000000,0.000000,0.021053,0.000000,0.0,Nov,2,2,3,11,Returning_Visitor,0,0


In [12]:
## one hot encoding
onehot_encoder_vist=OneHotEncoder()
visit_encoder=onehot_encoder_vist.fit_transform(data[['VisitorType']])
vistor_encoder_df=pd.DataFrame(
    visit_encoder.toarray(),
    columns=onehot_encoder_vist.get_feature_names_out(['VisitorType']),
    index=data.index
)

vistor_encoder_df

Unnamed: 0,VisitorType_New_Visitor,VisitorType_Other,VisitorType_Returning_Visitor
0,0.0,0.0,1.0
1,0.0,0.0,1.0
2,0.0,0.0,1.0
3,0.0,0.0,1.0
4,0.0,0.0,1.0
...,...,...,...
12325,0.0,0.0,1.0
12326,0.0,0.0,1.0
12327,0.0,0.0,1.0
12328,0.0,0.0,1.0


In [13]:
## combine one hot encoding 
data=pd.concat([data.drop('VisitorType',axis=1),vistor_encoder_df],axis=1)
data

Unnamed: 0,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,Weekend,Revenue,VisitorType_New_Visitor,VisitorType_Other,VisitorType_Returning_Visitor
0,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,1,1,1,1,0,0,0.0,0.0,1.0
1,2,64.000000,0.000000,0.100000,0.000000,0.0,Feb,2,2,1,2,0,0,0.0,0.0,1.0
2,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,4,1,9,3,0,0,0.0,0.0,1.0
3,2,2.666667,0.050000,0.140000,0.000000,0.0,Feb,3,2,2,4,0,0,0.0,0.0,1.0
4,10,627.500000,0.020000,0.050000,0.000000,0.0,Feb,3,3,1,4,1,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12325,53,1783.791667,0.007143,0.029031,12.241717,0.0,Dec,4,6,1,1,1,0,0.0,0.0,1.0
12326,5,465.750000,0.000000,0.021333,0.000000,0.0,Nov,3,2,1,8,1,0,0.0,0.0,1.0
12327,6,184.250000,0.083333,0.086667,0.000000,0.0,Nov,3,2,1,13,1,0,0.0,0.0,1.0
12328,15,346.000000,0.000000,0.021053,0.000000,0.0,Nov,2,2,3,11,0,0,0.0,0.0,1.0


In [21]:
##one hot encoding for month 
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Step 1: Create encoder (make sure output is dense)
onehot_encoder_month = OneHotEncoder(sparse_output=False)

# Step 2: Fit + transform the actual Month column
month_encoded = onehot_encoder_month.fit_transform(data[['Month']])

# Step 3: Convert result into DataFrame
month_encoded_df = pd.DataFrame(
    month_encoded,
    columns=onehot_encoder_month.get_feature_names_out(['Month']),
    index=data.index
)

# Step 4: Concatenate with original data (drop old Month column)
data = pd.concat([data.drop("Month", axis=1), month_encoded_df], axis=1)

data.head()


Unnamed: 0,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,OperatingSystems,Browser,Region,TrafficType,...,Month_Aug,Month_Dec,Month_Feb,Month_Jul,Month_June,Month_Mar,Month_May,Month_Nov,Month_Oct,Month_Sep
0,1,0.0,0.2,0.2,0.0,0.0,1,1,1,1,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,64.0,0.0,0.1,0.0,0.0,2,2,1,2,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0.0,0.2,0.2,0.0,0.0,4,1,9,3,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,2.666667,0.05,0.14,0.0,0.0,3,2,2,4,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10,627.5,0.02,0.05,0.0,0.0,3,3,1,4,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
