## Import libraries

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
%matplotlib inline

### Dataset

In [2]:
# Dataset
df = pd.read_csv("/content/online_shoppers_intention.csv")
# Head
df.head()

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,0,0.0,0,0.0,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,0,0.0,0,0.0,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,0,0.0,0,0.0,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,False


In [3]:
# Shape
df.shape

(12330, 18)

In [4]:
# info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12330 entries, 0 to 12329
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Administrative           12330 non-null  int64  
 1   Administrative_Duration  12330 non-null  float64
 2   Informational            12330 non-null  int64  
 3   Informational_Duration   12330 non-null  float64
 4   ProductRelated           12330 non-null  int64  
 5   ProductRelated_Duration  12330 non-null  float64
 6   BounceRates              12330 non-null  float64
 7   ExitRates                12330 non-null  float64
 8   PageValues               12330 non-null  float64
 9   SpecialDay               12330 non-null  float64
 10  Month                    12330 non-null  object 
 11  OperatingSystems         12330 non-null  int64  
 12  Browser                  12330 non-null  int64  
 13  Region                   12330 non-null  int64  
 14  TrafficType           

In [11]:
# Label encoding
label_encoder = LabelEncoder()
columns_to_encoded = ['Month','VisitorType','Weekend','Revenue']
df[columns_to_encoded] = df[columns_to_encoded].apply(label_encoder.fit_transform)

In [13]:
# Separate features (X) and target variables (y)
X = df.drop(['Revenue', 'Weekend', 'Informational_Duration'], axis=1)
y_revenue = df['Revenue']
y_weekend = df['Weekend']
informational_duration = df['Informational_Duration']

In [14]:
X_train, X_test, y_revenue_train, y_revenue_test, y_weekend_train, y_weekend_test, informational_duration_train, informational_duration_test = train_test_split(X, y_revenue, y_weekend, informational_duration, test_size=0.2, random_state=42)

In [15]:
# Initialize the ensemble model
model = RandomForestClassifier()

# Train the model
model.fit(X_train, y_revenue_train)

In [16]:
# Make predictions for revenue
revenue_predictions = model.predict(X_test)
revenue_predictions

array([0, 0, 0, ..., 0, 0, 0])

In [17]:
# Train the model for predicting weekend
model.fit(X_train, y_weekend_train)

# Make predictions for weekend
weekend_predictions = model.predict(X_test)
weekend_predictions

array([0, 0, 0, ..., 0, 0, 0])

In [18]:
# Train regressor model
model_regressor = RandomForestRegressor()
# Train the model
model_regressor.fit(X_train,informational_duration_train)

In [19]:
# Make predictions for informational duration
informational_duration_predictions = model_regressor.predict(X_test)

In [20]:
informational_duration_predictions

array([  0.        , 290.53588334,   0.        , ...,  71.26866667,
       421.53089286,   0.        ])

In [22]:
# Evaluate revenue predictions
revenue_accuracy = accuracy_score(y_revenue_test, revenue_predictions)
print(f'Revenue Accuracy: {revenue_accuracy}')
# Evaluate weekend predictions
weekend_accuracy = accuracy_score(y_weekend_test, weekend_predictions)
print(f'Weekend Accuracy: {weekend_accuracy}')
# Evaluate informational duration predictions
informational_duration_mse = mean_squared_error(informational_duration_test, informational_duration_predictions)
print(f'Information Duration: {informational_duration_mse}')

Revenue Accuracy: 0.8909164639091647
Weekend Accuracy: 0.764801297648013
Information Duration: 13942.154427371037
