# Price Optimization

This example is adapted from the example in Gurobi's modeling examples [How Much
Is Too Much? Avocado Pricing and Supply Using Mathematical
Optimization](https://github.com/Gurobi/modeling-examples/tree/master/price_optimization).

We develop the same example as in the documentation but we try and compare different
regression models to estimate demand

In [None]:
import pandas as pd
import warnings

import matplotlib.pyplot as plt
from sklearn import tree
import seaborn as sns

import numpy as np

import gurobipy as gp
from gurobipy import GRB


from gurobi_ml import add_predictor_constr
import gurobipy_pandas as gppd

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## Load the Packages and the Datasets

In [None]:
# Get the data

data_url = "https://raw.githubusercontent.com/Gurobi/modeling-examples/master/price_optimization/"
avocado = pd.read_csv(
    data_url + "HABdata_2019_2022.csv"
)  # dataset downloaded directly from HAB
avocado_old = pd.read_csv(
    data_url + "kaggledata_till2018.csv"
)  # dataset downloaded from Kaggle
avocado = pd.concat([avocado, avocado_old])

# Add the index for each year from 2015 through 2022
avocado["date"] = pd.to_datetime(avocado["date"])
avocado["year"] = pd.DatetimeIndex(avocado["date"]).year
avocado["year_index"] = avocado["year"] - 2015
avocado = avocado.sort_values(by="date")

# Define the peak season
avocado["month"] = pd.DatetimeIndex(avocado["date"]).month
peak_months = range(2, 8)  # <--------- Set the months for the "peak season"


def peak_season(row):
    return 1 if int(row["month"]) in peak_months else 0


avocado["peak"] = avocado.apply(lambda row: peak_season(row), axis=1)

# Scale the number of avocados to millions
avocado["units_sold"] = avocado["units_sold"] / 1000000

# Select only conventional avocados
avocado = avocado[avocado["type"] == "Conventional"]

avocado = avocado[
    ["date", "units_sold", "price", "region", "year", "month", "year_index", "peak"]
].reset_index(drop=True)

avocado

## Train regressions

We prepare the data using `OneHotEncoder` and `make_column_transformer`. We want
to transform the region feature using the encoder while we apply scaling to the other features.

In [None]:
regions = [
    "Great_Lakes",
    "Midsouth",
    "Northeast",
    "Northern_New_England",
    "SouthCentral",
    "Southeast",
    "West",
    "Plains",
]
df = avocado[avocado.region.isin(regions)]

X = df[["region", "price", "year", "peak"]]
y = df["units_sold"]

# Split the data for training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=1
)

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

feat_transform = make_column_transformer(
    (OneHotEncoder(drop="first"), ["region"]),
    (StandardScaler(), ["price", "year"]),
    ("passthrough", ["peak"]),
    remainder='drop'
)
pipeline = make_pipeline(feat_transform, LinearRegression())
pipeline.fit(X_train, y_train)

r2_test = r2_score(y_test, pipeline.predict(X_test))
r2_train = r2_score(y_train, pipeline.predict(X_train))

print(f"R^2 value in the test set is {r2_test:.3f} training {r2_train:.3f}")

In [None]:
pipeline

To validate the regression model, we will randomly split the dataset into $80\%$
training and $20\%$ testing data and learn the weights using `Scikit-learn`.

Create dictionary with various regression models that we want to use

## Prepare data of optimization model

In [None]:
# Sets and parameters
B = 30  # total amount ot avocado supply

peak_or_not = 1  # 1 if it is the peak season; 1 if isn't
year = 2020

c_waste = 0.5  # the cost ($) of wasting an avocado
# the cost of transporting an avocado
c_transport = pd.Series(
    {
        "Great_Lakes": 0.3,
        "Midsouth": 0.1,
        "Northeast": 0.4,
        "Northern_New_England": 0.5,
        "SouthCentral": 0.3,
        "Southeast": 0.2,
        "West": 0.2,
        "Plains": 0.2,
    }, name='transport_cost'
)

c = c_transport.loc[regions]
# the cost of transporting an avocado

# Get the lower and upper bounds from the dataset for the price and the number of products to be stocked
a_min = 0  # minimum avocado price in each region
a_max = 2  # maximum avocado price in each region

data = pd.concat([c_transport,
                  df.groupby("region")["units_sold"].min().rename('min_delivery'),
                  df.groupby("region")["units_sold"].max().rename('max_delivery')], axis=1)

In [None]:
data

In [None]:
m = gp.Model("Avocado_Price_Allocation")

p = gppd.add_vars(m, data, name="price", lb=a_min, ub=a_max)
d = gppd.add_vars(m, data, lb=-gp.GRB.INFINITY, name="demand")
u = m.addVar()
m.update()
pd.concat([p,d], axis=1)

In [None]:
m.update()

In [None]:
m.setObjective(((p - data["transport_cost"]) * d).sum() - c_waste * u, GRB.MAXIMIZE)

m.addConstr(d.sum() + u == B)

In [None]:
feats = pd.DataFrame(
    data={
        "year": year,
        "peak": peak_or_not,
        "region": regions,
    },
    index=regions
)
feats = pd.concat(
[feats, p],
axis=1)[["region", "price", "year", "peak"]]

In [None]:
pred_constr = add_predictor_constr(m, pipeline, feats, d, no_debug=1)

pred_constr.print_stats()

In [None]:
m.Params.NonConvex = 2
m.optimize()

In [None]:
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Demand"] = d.gppd.X

opt_revenue = m.ObjVal
print(f" Optimal net revenue: {opt_revenue:.3}")
print(f"\n Unsold avocados: {u.X:.2}")
solution.round(4)

We can also check the error in the estimate of the Gurobi solution for the regression model.

In [None]:
print(
    "Maximum error in approximating the regression {:.6}".format(
        np.max(pred_constr.get_error())
    )
)

And the computed features of the regression model in a pandas dataframe.

In [None]:
pred_constr.input_values.drop("region", axis=1).round(2)

Let us now visualize a scatter plot between the price and the number of avocados
sold (in millions) for the eight regions.

In [None]:
fig, ax = plt.subplots(1, 1)

plot_sol = sns.scatterplot(data=solution, x="Price", y="Demand", hue=solution.index, s=100)
plot_sol.legend(loc="center left", bbox_to_anchor=(1.25, 0.5), ncol=1)
plt.ylim(0, 5.2)
plt.xlim(1, 2.2)
ax.set_xlabel("Price per avocado ($)")
ax.set_ylabel("Number of avocados sold (millions)")
plt.show()
print(f" Optimal net revenue: {opt_revenue:.3} million, unsold avocados: {u.X:.2} millions")

We have shown how to model the price and supply optimization problem with Gurobi
Machine Learning. In the [Gurobi modeling examples
notebook](https://github.com/Gurobi/modeling-examples/tree/master/price_optimization)
more analysis of the solutions this model can give is done interactively. Be
sure to take look at it.

Copyright © 2022 Gurobi Optimization, LLC