In [77]:
import pandas as pd
import joblib

# Load the trained model and encoders
model_v3_grid_search_rbf = joblib.load('svm_model.pkl')
month_encoder = joblib.load('month_encoder.pkl')
visitor_type_encoder = joblib.load('visitor_type_encoder.pkl')

# Print encoder classes to debug
print("Month Encoder Classes:", month_encoder.classes_)
print("Visitor Type Encoder Classes:", visitor_type_encoder.classes_)

Month Encoder Classes: [0 1 2 3 4 5 6 7 8 9]
Visitor Type Encoder Classes: [0 1 2]


In [78]:
# Define test scenarios
test_scenarios = pd.DataFrame({
    'Administrative': [0, 1, 0, 0, 5],
    'Administrative_Duration': [0.0, 64.0, 0.0, 2.666667, 627.5],
    'Informational': [0, 0, 0, 0, 1],
    'Informational_Duration': [0.0, 0.0, 0.0, 0.05, 0.02],
    'ProductRelated': [1, 2, 1, 2, 10],
    'ProductRelated_Duration': [0.000000, 0.000000, 0.200000, 0.140000, 0.050000],
    'BounceRates': [0.200000, 0.100000, 0.200000, 0.140000, 0.000000],
    'ExitRates': [0.000000, 0.000000, 0.000000, 0.000000, 0.000000],
    'PageValues': [0.0, 0.0, 0.0, 0.0, 0.0],
    'SpecialDay': [2, 2, 9, 2, 1],
    'Month': ['Feb', 'Mar', 'May', 'Oct', 'Dec'],  # Using provided month values
    'OperatingSystems': [1, 2, 3, 4, 1],
    'Browser': [1, 2, 1, 2, 3],
    'Region': [1, 2, 1, 2, 2],
    'TrafficType': [1, 2, 9, 2, 3],
    'VisitorType': ['Returning_Visitor', 'New_Visitor', 'Other', 'Returning_Visitor', 'New_Visitor'],  # Provided visitor types
    'Weekend': [False, False, False, False, True]
})

In [79]:
# Print the unique values in the test data
print("Unique Months in Test Data:", test_scenarios['Month'].unique())
print("Unique VisitorTypes in Test Data:", test_scenarios['VisitorType'].unique())

Unique Months in Test Data: ['Feb' 'Mar' 'May' 'Oct' 'Dec']
Unique VisitorTypes in Test Data: ['Returning_Visitor' 'New_Visitor' 'Other']


In [81]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder

# Load the trained model and encoders
model_v3_grid_search_rbf = joblib.load('svm_model.pkl')
month_encoder = joblib.load('month_encoder.pkl')
visitor_type_encoder = joblib.load('visitor_type_encoder.pkl')

# Define test scenarios
test_scenarios = pd.DataFrame({
    'Administrative': [0, 1, 0, 0, 5],
    'Administrative_Duration': [0.0, 64.0, 0.0, 2.666667, 627.5],
    'Informational': [0, 1, 0, 0, 0],
    'Informational_Duration': [0.0, 0.0, 0.0, 0.050000, 0.020000],
    'ProductRelated': [1, 2, 1, 2, 10],
    'ProductRelated_Duration': [0.000000, 0.200000, 0.200000, 0.140000, 0.050000],
    'BounceRates': [0.200000, 0.100000, 0.200000, 0.000000, 0.000000],
    'ExitRates': [0.000000, 0.000000, 0.000000, 0.000000, 0.000000],
    'PageValues': [0.0, 0.0, 0.0, 0.0, 0.0],
    'SpecialDay': [0.0, 0.0, 0.0, 0.0, 0.0],
    'Month': ['Feb', 'Mar', 'May', 'Oct', 'June'],
    'VisitorType': ['Returning_Visitor', 'New_Visitor', 'Other', 'Returning_Visitor', 'New_Visitor'],
    'Weekday': [2, 2, 4, 3, 2],
    'OperatingSystems': [1, 2, 4, 3, 1],
    'Browser': [1, 2, 1, 2, 3],
    'Region': [1, 1, 9, 4, 1],
    'TrafficType': [2, 2, 3, 4, 1],
    'VisitorType': ['Returning_Visitor', 'New_Visitor', 'Other', 'Returning_Visitor', 'New_Visitor'],
    'Weekend': [False, False, False, False, True]
})

# Encode categorical variables in test data
try:
    test_scenarios['Month'] = month_encoder.transform(test_scenarios['Month'])
except ValueError as e:
    print(f"Error transforming 'Month': {e}")
    print(f"Available month classes: {month_encoder.classes_}")

try:
    test_scenarios['VisitorType'] = visitor_type_encoder.transform(test_scenarios['VisitorType'])
except ValueError as e:
    print(f"Error transforming 'VisitorType': {e}")
    print(f"Available visitor type classes: {visitor_type_encoder.classes_}")

# Convert all columns to the proper types if needed
# For example, ensure all numerical columns are floats
numerical_columns = ['Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration',
                      'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues',
                      'SpecialDay', 'Weekday', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'Weekend']
test_scenarios[numerical_columns] = test_scenarios[numerical_columns].astype(float)

# Check if the test data matches the encoder's classes
if 'Month' in test_scenarios.columns and 'VisitorType' in test_scenarios.columns:
    # Predict using the model
    predictions = model_v3_grid_search_rbf.predict(test_scenarios)
    
    # Print test scenarios with predictions
    for i, scenario in enumerate(test_scenarios.itertuples(index=False), start=1):
        print(f"Scenario {i}: {scenario}")
        print(f"Predicted Revenue: {'Yes' if predictions[i-1] == 1 else 'No'}")
        print("")
else:
    print("Error: Missing columns after encoding.")


Error transforming 'Month': invalid literal for int() with base 10: 'Feb'
Available month classes: [0 1 2 3 4 5 6 7 8 9]
Error transforming 'VisitorType': invalid literal for int() with base 10: 'Returning_Visitor'
Available visitor type classes: [0 1 2]




ValueError: could not convert string to float: 'Feb'

In [76]:
# Check if the test data matches the encoder's classes
assert all(test_scenarios['Month'].isin(month_encoder.classes_)), "Month values are not in the encoder classes"
assert all(test_scenarios['VisitorType'].isin(visitor_type_encoder.classes_)), "VisitorType values are not in the encoder classes"

# Encode categorical variables in test data
test_scenarios['Month'] = month_encoder.transform(test_scenarios['Month'])
test_scenarios['VisitorType'] = visitor_type_encoder.transform(test_scenarios['VisitorType'])

AssertionError: Month values are not in the encoder classes

In [None]:
month_encoder = joblib.load('month_encoder.pkl')
visitor_type_encoder = joblib.load('visitor_type_encoder.pkl')

# Print encoder classes to debug
print("Month Encoder Classes:", month_encoder.classes_)
print("Visitor Type Encoder Classes:", visitor_type_encoder.classes_)

In [68]:
import pandas as pd
import joblib

# Load the trained model and encoders
model_v3_grid_search_rbf = joblib.load('model_v3_grid_search_rbf.pkl')
month_encoder = joblib.load('month_encoder.pkl')
visitor_type_encoder = joblib.load('visitor_type_encoder.pkl')

# Create DataFrame for test scenarios
test_scenarios = pd.DataFrame({
    'Administrative': [0, 1, 0, 0, 5],
    'Administrative_Duration': [0.0, 64.0, 0.0, 2.666667, 627.5],
    'Informational': [0, 0, 0, 0, 1],
    'Informational_Duration': [0.0, 0.0, 0.0, 0.05, 0.02],
    'ProductRelated': [1, 2, 1, 2, 10],
    'ProductRelated_Duration': [0.000000, 0.000000, 0.200000, 0.140000, 0.050000],
    'BounceRates': [0.200000, 0.100000, 0.200000, 0.140000, 0.000000],
    'ExitRates': [0.000000, 0.000000, 0.000000, 0.000000, 0.000000],
    'PageValues': [0.0, 0.0, 0.0, 0.0, 0.0],
    'SpecialDay': [2, 2, 9, 2, 1],
    'Month': ['May', 'Jan', 'Mar', 'Apr', 'Dec'],
    'OperatingSystems': [1, 2, 3, 4, 1],
    'Browser': [1, 2, 1, 2, 3],
    'Region': [1, 2, 1, 2, 2],
    'TrafficType': [1, 2, 9, 2, 3],
    'VisitorType': ['Returning', 'New', 'Returning', 'New', 'Returning'],
    'Weekend': [False, False, False, False, True]
})

# Encode categorical variables in test data
test_scenarios['Month'] = month_encoder.transform(test_scenarios['Month'])
test_scenarios['VisitorType'] = visitor_type_encoder.transform(test_scenarios['VisitorType'])

# Predict using the model
predictions = model_v3_grid_search_rbf.predict(test_scenarios)

# Print test scenarios with predictions
for i, scenario in enumerate(test_scenarios.itertuples(index=False), start=1):
    print(f"Scenario {i}: {scenario}")
    print(f"Predicted Revenue: {'Yes' if predictions[i-1] == 1 else 'No'}")
    print("")


Scenario 2 - Predictions for unknown visitor types:
Encoded VisitorType Values: [-1]
Predictions: [ True]





In [69]:
import numpy as np
import pandas as pd
import joblib

# Load the trained model, encoders, and scaler
model_v3_loaded = joblib.load('svm_model.pkl')
month_encoder_loaded = joblib.load('month_encoder.pkl')
visitor_type_encoder_loaded = joblib.load('visitor_type_encoder.pkl')
scaler = joblib.load('scaler.pkl')  # Load scaler if used

# New data with missing feature values
new_data = pd.DataFrame({
    'Administrative': [1],
    'Administrative_Duration': [np.nan],  # Missing value
    'Informational': [2],
    'Informational_Duration': [np.nan],  # Missing value
    'ProductRelated': [1],
    'ProductRelated_Duration': [100.0],
    'BounceRates': [0.2],
    'ExitRates': [0.1],
    'PageValues': [2.0],
    'SpecialDay': [0.1],
    'Month': ['Feb'],
    'OperatingSystems': [1],
    'Browser': [1],
    'Region': [1],
    'TrafficType': [1],
    'VisitorType': ['Returning'],
    'Weekend': [0],
    'Revenue': [0]
})

# Fill missing values with default or mean
new_data.fillna({
    'Administrative_Duration': 0.0,
    'Informational_Duration': 0.0
}, inplace=True)

# Apply encoding
new_data['Month'] = new_data['Month'].apply(lambda x: encode_with_default(month_encoder_loaded, x))
new_data['VisitorType'] = new_data['VisitorType'].apply(lambda x: encode_with_default(visitor_type_encoder_loaded, x))

# Apply scaling
X_new = scaler.transform(new_data.drop(columns=['Revenue']))

# Make predictions
predictions = model_v3_loaded.predict(X_new)
print("Scenario 3 - Predictions for missing feature values:")
print(f"Data with Filled Missing Values:\n{new_data}")
print(f"Predictions: {predictions}\n")


FileNotFoundError: [Errno 2] No such file or directory: 'scaler.pkl'

In [58]:


# Example new data (replace this with your actual new data)
new_data = pd.DataFrame({
    'Administrative': [1],
    'Administrative_Duration': [100],
    'Informational': [2],
    'Informational_Duration': [200],
    'ProductRelated': [3],
    'ProductRelated_Duration': [300],
    'BounceRates': [0.2],
    'ExitRates': [0.1],
    'PageValues': [10],
    'SpecialDay': [0.8],
    'Month': ['Feb'],  # Replace with the actual month
    'OperatingSystems': ['Windows'],  # Example, encode as necessary
    'Browser': ['Chrome'],  # Example, encode as necessary
    'Region': ['US'],  # Example, encode as necessary
    'TrafficType': ['Direct'],  # Example, encode as necessary
    'VisitorType': ['Returning'],  # Replace with the actual visitor type
    'Weekend': [1]  # Binary variable
})




In [60]:
def encode_with_default(encoder, value, default=-1):
    try:
        return encoder.transform([value])[0]
    except ValueError:
        return default

# Apply encoding with default
new_data['Month'] = new_data['Month'].apply(lambda x: encode_with_default(month_encoder_loaded, x))
new_data['VisitorType'] = new_data['VisitorType'].apply(lambda x: encode_with_default(visitor_type_encoder_loaded, x))
new_data

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend
0,1,100,2,200,3,300,0.2,0.1,10,0.8,-1,Windows,Chrome,US,Direct,-1,1


In [None]:
from sklearn.preprocessing import OneHotEncoder

# Fit OneHotEncoder on the training data
one_hot_encoder = OneHotEncoder(handle_unknown='ignore')
one_hot_encoder.fit(df_original[['Month', 'VisitorType']])

# Transform new data
new_data_encoded = one_hot_encoder.transform(new_data[['Month', 'VisitorType']])


In [41]:
# Example scenarios for a website user session
data_scenarios = [
    # Scenario 1: Returning Visitor with Minimal Activity
    {
        'Administrative': [0],
        'Administrative_Duration': [0.0],
        'Informational': [1],
        'Informational_Duration': [5.0],
        'ProductRelated': [2],
        'ProductRelated_Duration': [10.0],
        'BounceRates': [0.15],
        'ExitRates': [0.10],
        'PageValues': [5.0],
        'SpecialDay': [0.0],
        'Month': [1],  # January
        'OperatingSystems': [1],
        'Browser': [1],
        'Region': [1],
        'TrafficType': [1],
        'VisitorType': [1],  # Returning Visitor
        'Weekend': [0]
    },

    # Scenario 2: New Visitor with High Engagement
    {
        'Administrative': [1],
        'Administrative_Duration': [15.0],
        'Informational': [2],
        'Informational_Duration': [10.0],
        'ProductRelated': [10],
        'ProductRelated_Duration': [120.0],
        'BounceRates': [0.05],
        'ExitRates': [0.02],
        'PageValues': [25.0],
        'SpecialDay': [0.1],
        'Month': [5],  # May
        'OperatingSystems': [2],
        'Browser': [2],
        'Region': [2],
        'TrafficType': [3],
        'VisitorType': [0],  # New Visitor
        'Weekend': [1]
    },

    # Scenario 3: Returning Visitor with High Transaction Potential
    {
        'Administrative': [2],
        'Administrative_Duration': [25.0],
        'Informational': [3],
        'Informational_Duration': [20.0],
        'ProductRelated': [8],
        'ProductRelated_Duration': [80.0],
        'BounceRates': [0.03],
        'ExitRates': [0.01],
        'PageValues': [40.0],
        'SpecialDay': [0.2],
        'Month': [8],  # August
        'OperatingSystems': [3],
        'Browser': [3],
        'Region': [3],
        'TrafficType': [2],
        'VisitorType': [1],  # Returning Visitor
        'Weekend': [1]
    },

    # Scenario 4: New Visitor with Low Engagement
    {
        'Administrative': [0],
        'Administrative_Duration': [0.0],
        'Informational': [1],
        'Informational_Duration': [2.0],
        'ProductRelated': [1],
        'ProductRelated_Duration': [5.0],
        'BounceRates': [0.20],
        'ExitRates': [0.15],
        'PageValues': [2.0],
        'SpecialDay': [0.0],
        'Month': [11],  # November
        'OperatingSystems': [4],
        'Browser': [4],
        'Region': [4],
        'TrafficType': [4],
        'VisitorType': [0],  # New Visitor
        'Weekend': [0]
    },

    # Scenario 5: High Engagement During Special Day
    {
        'Administrative': [4],
        'Administrative_Duration': [40.0],
        'Informational': [5],
        'Informational_Duration': [30.0],
        'ProductRelated': [12],
        'ProductRelated_Duration': [150.0],
        'BounceRates': [0.02],
        'ExitRates': [0.01],
        'PageValues': [60.0],
        'SpecialDay': [0.5],
        'Month': [12],  # December
        'OperatingSystems': [2],
        'Browser': [1],
        'Region': [1],
        'TrafficType': [2],
        'VisitorType': [1],  # Returning Visitor
        'Weekend': [1]
    }
]


In [40]:
# Load the model and encoders
model = joblib.load('model_v3.pkl')
month_encoder = joblib.load('month_encoder.pkl')
visitor_type_encoder = joblib.load('visitor_type_encoder.pkl')

# Example new data
new_data = pd.DataFrame({
    'Administrative': [0],
    'Administrative_Duration': [0.0],
    'Informational': [3],
    'Informational_Duration': [12.0],
    'ProductRelated': [5],
    'ProductRelated_Duration': [25.0],
    'BounceRates': [0.08],
    'ExitRates': [0.04],
    'PageValues': [20.0],
    'SpecialDay': [0.1],
    'Month': [3],  # March
    'OperatingSystems': [2],
    'Browser': [2],
    'Region': [1],
    'TrafficType': [1],
    'VisitorType': [2],  # Returning Visitor
    'Weekend': [0]
})

# Encode the new data
new_data['Month'] = month_encoder.transform(new_data['Month'])
new_data['VisitorType'] = visitor_type_encoder.transform(new_data['VisitorType'])

# Make predictions
predictions = model.predict(new_data)
print(f'Predictions: {predictions}')


Predictions: [False]


