Now that we have a working model, let's see what it will predict with random data.

In [13]:
import pandas as pd
import numpy as np
import joblib
import random

feature_cols = [
    'Year', 'Month', 'Customer Age', 'Quantity', 'Unit Cost', 'Unit Price', 'Cost', 'DayOfWeek', 'Profit Margin',
    'Customer Gender_m', 'Country_Germany', 'Country_United Kingdom', 'Country_United States',
    'State_Arizona', 'State_Bayern', 'State_Brandenburg', 'State_California', 'State_Charente-Maritime',
    'State_England', 'State_Essonne', 'State_Florida', 'State_Garonne (Haute)', 'State_Georgia',
    'State_Hamburg', 'State_Hauts de Seine', 'State_Hessen', 'State_Illinois', 'State_Kentucky',
    'State_Loir et Cher', 'State_Loiret', 'State_Massachusetts', 'State_Minnesota', 'State_Mississippi',
    'State_Missouri', 'State_Montana', 'State_Moselle', 'State_New York', 'State_Nord',
    'State_Nordrhein-Westfalen', 'State_North Carolina', 'State_Ohio', 'State_Oregon',
    'State_Pas de Calais', 'State_Saarland', 'State_Seine (Paris)', 'State_Seine Saint Denis',
    'State_Seine et Marne', 'State_Somme', 'State_South Carolina', 'State_Texas', 'State_Utah',
    "State_Val d'Oise", 'State_Val de Marne', 'State_Virginia', 'State_Washington', 'State_Wyoming',
    'State_Yveline', 'Product Category_Bikes', 'Product Category_Clothing', 'Sub Category_Bike Stands',
    'Sub Category_Bottles and Cages', 'Sub Category_Caps', 'Sub Category_Cleaners', 'Sub Category_Fenders',
    'Sub Category_Gloves', 'Sub Category_Helmets', 'Sub Category_Hydration Packs', 'Sub Category_Jerseys',
    'Sub Category_Mountain Bikes', 'Sub Category_Road Bikes', 'Sub Category_Shorts', 'Sub Category_Socks',
    'Sub Category_Tires and Tubes', 'Sub Category_Touring Bikes', 'Sub Category_Vests'
]

n_samples = 10

# Initialize with zeros
sample_data = pd.DataFrame(0, index=np.arange(n_samples), columns=feature_cols)


# Group columns by category prefix for random selection
gender_cols = ['Customer Gender_m']  # binary example; assuming 'm' means male (1) or female (0)
country_cols = ['Country_Germany', 'Country_United Kingdom', 'Country_United States']
state_cols = [col for col in feature_cols if col.startswith('State_')]
product_category_cols = ['Product Category_Bikes', 'Product Category_Clothing']
sub_category_cols = [col for col in feature_cols if col.startswith('Sub Category_')]

# Helper function to create zero vectors and set one randomly to 1
def one_hot_random(cols, n_samples):
    arr = np.zeros((n_samples, len(cols)), dtype=int)
    for i in range(n_samples):
        idx = random.randint(0, len(cols) - 1)
        arr[i, idx] = 1
    return pd.DataFrame(arr, columns=cols)


Run the block below to generate sample data.

In [14]:
n_samples = 10

# Create numeric features with fixed or random values
sample_data = pd.DataFrame({
    'Year': [2023]*n_samples,
    'Month': [random.randint(1, 12) for _ in range(n_samples)],
    'Customer Age': [random.randint(18, 65) for _ in range(n_samples)],
    'Quantity': [random.randint(1, 5) for _ in range(n_samples)],
    'Unit Cost': [round(random.uniform(10, 100), 2) for _ in range(n_samples)],
    'Unit Price': [round(random.uniform(20, 150), 2) for _ in range(n_samples)],
    'Cost': 0,  # will calculate next
    'DayOfWeek': [random.randint(0, 6) for _ in range(n_samples)],
    'Profit Margin': 0,  # will calculate next
})

# Calculate Cost and Profit Margin
sample_data['Cost'] = sample_data['Quantity'] * sample_data['Unit Cost']
sample_data['Profit Margin'] = sample_data['Unit Price'] - sample_data['Unit Cost']

# Generate random one-hot for categorical columns
sample_gender = one_hot_random(gender_cols, n_samples)
sample_country = one_hot_random(country_cols, n_samples)
sample_state = one_hot_random(state_cols, n_samples)
sample_product_category = one_hot_random(product_category_cols, n_samples)
sample_sub_category = one_hot_random(sub_category_cols, n_samples)

# Combine all into one DataFrame
sample_data = pd.concat([sample_data,
                         sample_gender,
                         sample_country,
                         sample_state,
                         sample_product_category,
                         sample_sub_category], axis=1)

# Make sure columns order matches model expectation
sample_data = sample_data[feature_cols]


Run the block below to see the result.

In [17]:
model = joblib.load('../models/model.pkl')

predictions = model.predict(sample_data)

for i in range(len(sample_data)):
    row = sample_data.iloc[i]
    # Extract key details
    year = row['Year']
    month = row['Month']
    age = row['Customer Age']
    quantity = row['Quantity']
    unit_price = row['Unit Price']
    day_of_week = row['DayOfWeek']

    # Find which gender, country, state, product category, subcategory is set to 1
    gender = 'Male' if row['Customer Gender_m'] == 1 else 'Female'

    # For countries (only one should be 1)
    countries = ['Germany', 'United Kingdom', 'United States']
    country_cols = ['Country_Germany', 'Country_United Kingdom', 'Country_United States']
    country = [c for c, col in zip(countries, country_cols) if row[col] == 1]
    country = country[0] if country else 'Unknown'

    # States
    state_cols = [col for col in sample_data.columns if col.startswith('State_')]
    state = [col.replace('State_', '') for col in state_cols if row[col] == 1]
    state = state[0] if state else 'Unknown'

    # Product Category
    product_categories = ['Bikes', 'Clothing']
    product_cols = ['Product Category_Bikes', 'Product Category_Clothing']
    product_category = [p for p, col in zip(product_categories, product_cols) if row[col] == 1]
    product_category = product_category[0] if product_category else 'Unknown'

    # Sub Category (multiple, but presumably only one is 1)
    sub_cat_cols = [col for col in sample_data.columns if col.startswith('Sub Category_')]
    sub_category = [col.replace('Sub Category_', '') for col in sub_cat_cols if row[col] == 1]
    sub_category = sub_category[0] if sub_category else 'Unknown'

    print(f"Sample {i+1}: Year={year}, Month={month}, Age={age}, Quantity={quantity}, Unit Price=${unit_price:.2f}, "
          f"DayOfWeek={day_of_week}, Gender={gender}, Country={country}, State={state}, "
          f"Product Category={product_category}, Sub Category={sub_category}")
    print(f"  Predicted Revenue: ${predictions[i]:.2f}\n")



Sample 1: Year=2023.0, Month=4.0, Age=60.0, Quantity=2.0, Unit Price=$47.93, DayOfWeek=4.0, Gender=Male, Country=United States, State=Massachusetts, Product Category=Clothing, Sub Category=Caps
  Predicted Revenue: $215.61

Sample 2: Year=2023.0, Month=9.0, Age=27.0, Quantity=4.0, Unit Price=$41.75, DayOfWeek=4.0, Gender=Male, Country=United States, State=Seine (Paris), Product Category=Bikes, Sub Category=Road Bikes
  Predicted Revenue: $436.63

Sample 3: Year=2023.0, Month=10.0, Age=25.0, Quantity=2.0, Unit Price=$34.61, DayOfWeek=1.0, Gender=Male, Country=Germany, State=Arizona, Product Category=Bikes, Sub Category=Jerseys
  Predicted Revenue: $299.27

Sample 4: Year=2023.0, Month=10.0, Age=56.0, Quantity=3.0, Unit Price=$53.70, DayOfWeek=1.0, Gender=Male, Country=United Kingdom, State=Moselle, Product Category=Bikes, Sub Category=Cleaners
  Predicted Revenue: $276.53

Sample 5: Year=2023.0, Month=12.0, Age=18.0, Quantity=1.0, Unit Price=$69.63, DayOfWeek=2.0, Gender=Male, Country=U