In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Step 1: Load the dataset and select relevant columns
df = pd.read_csv('online_shoppers_intention.csv')
selected_columns = ['Administrative', 'Informational', 'ProductRelated', 'Weekend', 'Informational_Duration', 'Revenue']
df = df[selected_columns]

# Step 2: Feature Engineering (if required)

# Step 3: Data Split
X = df.drop(['Revenue', 'Weekend'], axis=1)
y_revenue = df['Revenue']
y_weekend = df['Weekend']
y_info_duration = df['Informational_Duration']

X_train, X_test, y_revenue_train, y_revenue_test, y_weekend_train, y_weekend_test, y_info_duration_train, y_info_duration_test = \
    train_test_split(X, y_revenue, y_weekend, y_info_duration, test_size=0.2, random_state=42)

# Step 4: Ensemble Learning Algorithm
revenue_model = RandomForestClassifier()
weekend_model = RandomForestClassifier()
info_duration_model = RandomForestRegressor()

# Step 5: Model Training
revenue_model.fit(X_train, y_revenue_train)
weekend_model.fit(X_train, y_weekend_train)
info_duration_model.fit(X_train, y_info_duration_train)

# Step 6: Predictions
revenue_predictions = revenue_model.predict(X_test)
weekend_predictions = weekend_model.predict(X_test)
info_duration_predictions = info_duration_model.predict(X_test)

# Evaluation (if necessary)
revenue_accuracy = accuracy_score(y_revenue_test, revenue_predictions)
weekend_accuracy = accuracy_score(y_weekend_test, weekend_predictions)
info_duration_error = mean_squared_error(y_info_duration_test, info_duration_predictions)

print(f"Revenue Accuracy: {revenue_accuracy:.4f}")
print(f"Weekend Accuracy: {weekend_accuracy:.4f}")
print(f"Informational Duration Error: {info_duration_error:.4f}")


Revenue Accuracy: 0.8147
Weekend Accuracy: 0.7279
Informational Duration Error: 7.1663
