In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_score
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate
from zipfile import ZipFile
from xgboost import XGBRegressor
import os

In [None]:
# Load the dataset
orders = pd.read_csv('/content/orders.csv')
order_products_prior = pd.read_csv('/content/order_products__prior.csv')
products = pd.read_csv('/content/products.csv')
aisles = pd.read_csv('/content/aisles.csv')
departments = pd.read_csv('/content/departments.csv')

# Merge product details into the orders
products = pd.merge(products, aisles, on='aisle_id', how='left')
products = pd.merge(products, departments, on='department_id', how='left')
order_products = pd.merge(order_products_prior, orders, on='order_id', how='left')
order_products = pd.merge(order_products, products, on='product_id', how='left')

# Handle missing values
order_products.fillna(0, inplace=True)



In [None]:
# Feature Engineering: Create new features
order_products['days_since_prior_order'].fillna(0, inplace=True)
order_products['user_total_orders'] = order_products.groupby('user_id')['order_number'].transform('max')
order_products['user_avg_days_since_prior'] = order_products.groupby('user_id')['days_since_prior_order'].transform('mean')
order_products['user_avg_order_size'] = order_products.groupby('user_id')['add_to_cart_order'].transform('mean')

# Reorder rate per product
product_reorder_rate = order_products.groupby('product_id')['reordered'].mean().reset_index()
product_reorder_rate.columns = ['product_id', 'product_reorder_rate']
order_products = pd.merge(order_products, product_reorder_rate, on='product_id', how='left')


In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Initialize the Decision Tree model
features = ['order_dow', 'order_hour_of_day', 'days_since_prior_order',
            'user_total_orders', 'user_avg_days_since_prior',
            'user_avg_order_size', 'product_reorder_rate']

X = order_products[features]
y = order_products['reordered']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
dt_model = DecisionTreeRegressor(max_depth=6, random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Make predictions
y_pred = dt_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f'MSE: {mse}')
print(f'MAE: {mae}')


MSE: 0.18893694275234443
MAE: 0.37791376293099094


In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

# Initialize the Decision Tree model
features = ['order_dow', 'order_hour_of_day', 'days_since_prior_order',
            'user_total_orders', 'user_avg_days_since_prior',
            'user_avg_order_size', 'product_reorder_rate']

X = order_products[features]
y = order_products['reordered']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
dt_model = DecisionTreeRegressor(max_depth=6, random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Make predictions
y_pred = dt_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f'MSE: {mse}')
print(f'MAE: {mae}')

# Print some predicted prices (assuming prices are the predicted values)
predicted_prices = y_pred  # Replace with actual price prediction array
actual_prices = y_test.values  # Actual prices from the test set

# Print or visualize some predicted vs actual prices
for i in range(10):  # Print the first 10 predictions for example
    print(f'Predicted Price: {predicted_prices[i]}, Actual Price: {actual_prices[i]}')


MSE: 0.18893694275234443
MAE: 0.37791376293099094
Predicted Price: 0.10408032383303566, Actual Price: 0
Predicted Price: 0.7246383005264654, Actual Price: 0
Predicted Price: 0.8483305617597058, Actual Price: 0
Predicted Price: 0.8539016082875526, Actual Price: 1
Predicted Price: 0.13596634388713597, Actual Price: 0
Predicted Price: 0.7655917088704033, Actual Price: 1
Predicted Price: 0.9160089254720145, Actual Price: 1
Predicted Price: 0.7313162573676394, Actual Price: 1
Predicted Price: 0.5710439148838874, Actual Price: 1
Predicted Price: 0.7655917088704033, Actual Price: 1


In [None]:
import joblib

# Save the model to a file
joblib.dump(dt_model, 'price_model.pkl')


['price_model.pkl']

In [None]:
import joblib

# Save the model to a file
joblib.dump(dt_model, 'price_recommendation_model.pkl')
