This is a **private notebook** to solve the optimization part in the **Shell.ai Hackathon 2023**. **Important, do not publish this notebook or its results anywhere, for private use only**.

# 1. Data loading

In [1]:
!pip3 install mip

Collecting mip
  Downloading mip-1.15.0-py3-none-any.whl (15.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.3/15.3 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mip
Successfully installed mip-1.15.0


In [2]:
# # Imports
# import pandas as pd
# import geopandas as gpd
# from datetime import date, timedelta
# import folium
# import folium.plugins
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np
# import plotly.express as px
# from branca.colormap import linear

# # GUJARAT center coordinates 22.6708° N, 71.5724° E
# GUJARAT_CENTER_LAT = 22.6708
# GUJARAT_CENTER_LON = 71.5724

In [3]:
# # Load csv data
# sample_submission_df = pd.read_csv('/kaggle/input/shellai/sample_submission.csv')
# sample_submission_df

In [4]:
# biomass_2018_forecast = sample_submission_df[(sample_submission_df['data_type'] == 'biomass_forecast')
#                                               & (sample_submission_df['year'] == 2018)]
# biomass_2018_forecast

In [5]:
# distance_matrix_df = pd.read_csv('/kaggle/input/shellai/Distance_Matrix.csv')
# distance_matrix = distance_matrix_df.to_numpy()[:, 1:] # Delete the first index row
# # distance_matrix[i, j] is the distance to move from source i to destination j
# distance_matrix_df

In [6]:
# print('Shape of distance matrix: ', distance_matrix.shape)
# print('Distance from source 0 to destination 2417: ', distance_matrix[0,2417])
# print('Distance from source 2417 to destination 0: ', distance_matrix[2417,0])

In [7]:
import pandas as pd
import os
from mip import Model, xsum, minimize, OptimizationStatus, BINARY
import matplotlib.pyplot as plt

SYNTH_DATA_PATH = '/kaggle/input/optimizationdata'     
OUT_SYNTH_DATA_PATH = '/kaggle/working'
FORECAST_FILE = 'Biomass_History_Synthetic.csv'
DISTANCE_FILE = 'Distance_Matrix_Synthetic.csv'

d_matrix = pd.read_csv(os.path.join(SYNTH_DATA_PATH, DISTANCE_FILE), 
                       index_col=0)
d_matrix = d_matrix.values

N = 30 #2418   # Use 30 for a small problem (optimal solution: to be computed)
TRANSPORT_FACTOR_A = 0.001
d_matrix_cost = TRANSPORT_FACTOR_A * d_matrix[:N, :N]

df_fc = pd.read_csv(os.path.join(SYNTH_DATA_PATH, FORECAST_FILE))
df_fc = df_fc.iloc[:N, :]

ls_j = range(len(d_matrix_cost))
year = 2018
cap_b_j = 20000 # Maximum depot capacity
cap_p_j = 100000 # Maximum production capacity
n_refineries = 5 # Number of refineries
n_depots = 25 # Number of depots

# Get the forecasted biomass for year 2018 of all the positions
d_bio_18 = df_fc.loc[:, '2018']
total_fc_18 = d_bio_18.sum()
d_bio_18 = d_bio_18.to_dict()
print("Forecasted biomass for year 2018: ", total_fc_18)


d_bio_19 = df_fc.loc[:, '2019']
total_fc_19 = d_bio_19.sum()
d_bio_19 = d_bio_19.to_dict()
print("Forecasted biomass for year 2019: ", total_fc_19)

# Get the solution for the optimization problem
m = Model(sense=minimize)
m.threads = -1

# Variables: biomass b_{i, 0}
# 1. All values (forecasted biomass, biomass demand-supply, pellet demand-supply) must be
# greater than or equal to zero.
b_18 = [m.add_var(name=f'b_2018_{i}_{j}', lb=0) for i in range(len(d_matrix_cost)) for j in ls_j]
print(f"Variables b_2018 go from {b_18[0].name} to {b_18[-1].name}")

b_19 = [m.add_var(name=f'b_2019_{i}_{j}', lb=0) for i in range(len(d_matrix_cost)) for j in ls_j]
print(f"Variables b_2019 go from {b_19[0].name} to {b_19[-1].name}")

p_18 = [m.add_var(name=f'p_2018_{i}_{j}', lb=0) for i in range(len(d_matrix_cost)) for j in ls_j]
print(f"Variables p_2018 go from {p_18[0].name} to {p_18[-1].name}")

p_19 = [m.add_var(name=f'p_2019_{i}_{j}', lb=0) for i in range(len(d_matrix_cost)) for j in ls_j]
print(f"Variables p_2019 go from {p_19[0].name} to {p_19[-1].name}")

x = [m.add_var(name=f'x_{j}', var_type=BINARY) for j in ls_j]
print(f"Variables x go from {x[0].name} to {x[-1].name}")

r = [m.add_var(name=f'r_{j}', var_type=BINARY) for j in ls_j]
print(f"Variables r go from {r[0].name} to {r[-1].name}")

# Constraints:
# 2. The amount of biomass procured for processing from each harvesting site ′𝑖𝑖′ must be less than
# or equal to that site’s forecasted biomass.
for i in range(len(d_matrix_cost)):
    m += xsum(m.var_by_name(f'b_2018_{i}_{j}') for j in ls_j) <= d_bio_18[i]
    m += xsum(m.var_by_name(f'b_2019_{i}_{j}') for j in ls_j) <= d_bio_19[i]

for j in ls_j:
    # 3-4. Can't transport more than storage limit
    m += xsum(m.var_by_name(f'b_2018_{i}_{j}') for i in range(len(d_matrix_cost))) <= cap_b_j * x[j]
    m += xsum(m.var_by_name(f'b_2019_{i}_{j}') for i in range(len(d_matrix_cost))) <= cap_b_j * x[j]
    m += xsum(m.var_by_name(f'p_2018_{i}_{j}') for i in range(len(d_matrix_cost))) <= cap_p_j * r[j]
    m += xsum(m.var_by_name(f'p_2019_{i}_{j}') for i in range(len(d_matrix_cost))) <= cap_p_j * r[j]

    # 8. Total amount of biomass entering each preprocessing depot is equal to the total amount of
    # pellets exiting that depot (within tolerance limit of 1e-03
    
    m += xsum(m.var_by_name(f'b_2018_{i}_{j}')  - m.var_by_name(f'p_2018_{j}_{i}') for i in range(len(d_matrix_cost))) <=\
          .001 * x[j]
    m += xsum(m.var_by_name(f'p_2018_{j}_{i}') - m.var_by_name(f'b_2018_{i}_{j}') for i in range(len(d_matrix_cost))) <=\
          .001 * x[j]
    
    m += xsum(m.var_by_name(f'b_2019_{i}_{j}')  - m.var_by_name(f'p_2019_{j}_{i}') for i in range(len(d_matrix_cost))) <=\
          .001 * x[j]
    m += xsum(m.var_by_name(f'p_2019_{j}_{i}') - m.var_by_name(f'b_2019_{i}_{j}') for i in range(len(d_matrix_cost))) <=\
          .001 * x[j]
    
# 5. Number of depots should be less than or equal to 25.
m += xsum(x[j] for j in ls_j) <= n_depots

# 6. Number of refineries should be less than or equal to 5.
m += xsum(r[j] for j in ls_j) <= n_refineries

# 7. At least 80% of the total forecasted biomass must be processed by refineries each year
m += xsum(m.var_by_name(f'p_2018_{i}_{j}') for i in range(len(d_matrix_cost)) for j in ls_j)\
    >= 0.8 * total_fc_18
m += xsum(m.var_by_name(f'p_2019_{i}_{j}') for i in range(len(d_matrix_cost)) for j in ls_j)\
    >= 0.8 * total_fc_19

# Previous optimization function used
# m.objective = minimize(xsum(d_matrix_cost[i, j] * (
#                                               m.var_by_name(f'b_2018_{i}_{j}') + m.var_by_name(f'b_2019_{i}_{j}') + \
#                                               m.var_by_name(f'p_2018_{i}_{j}') + m.var_by_name(f'p_2019_{i}_{j}')\
#                                               ) + \
#                             2*cap_b_j*x[j] - m.var_by_name(f'b_2018_{i}_{j}') - m.var_by_name(f'b_2019_{i}_{j}') + \
#                             2*cap_p_j*r[j] - m.var_by_name(f'p_2018_{i}_{j}') - m.var_by_name(f'p_2019_{i}_{j}') \
#                                 for i in range(len(d_matrix_cost)) for j in ls_j))
m.objective = minimize(xsum(d_matrix_cost[i, j] * (m.var_by_name(f'b_2018_{i}_{j}') + m.var_by_name(f'b_2019_{i}_{j}')) \
                            for i in range(len(d_matrix_cost)) for j in ls_j) + \
                       xsum(d_matrix_cost[i, j] * (m.var_by_name(f'p_2018_{i}_{j}') + m.var_by_name(f'p_2019_{i}_{j}')) \
                            for i in ls_j for j in ls_j) + \
                       xsum(2*cap_b_j*x[j] + 2*cap_p_j*r[j] for j in ls_j) + \
                       xsum(- m.var_by_name(f'b_2018_{i}_{j}') - m.var_by_name(f'b_2019_{i}_{j}') \
                            for i in range(len(d_matrix_cost)) for j in ls_j) + \
                       xsum(- m.var_by_name(f'p_2018_{i}_{j}') - m.var_by_name(f'p_2019_{i}_{j}') \
                            for i in ls_j for j in ls_j) \
                      )

Forecasted biomass for year 2018:  1488.016473495
Forecasted biomass for year 2019:  1766.976034022
Variables b_2018 go from b_2018_0_0 to b_2018_29_29
Variables b_2019 go from b_2019_0_0 to b_2019_29_29
Variables p_2018 go from p_2018_0_0 to p_2018_29_29
Variables p_2019 go from p_2019_0_0 to p_2019_29_29
Variables x go from x_0 to x_29
Variables r go from r_0 to r_29


In [8]:
print('Number of constraints: ', m.num_rows)                  # number of rows (constraints) in the model
print('Number of variables: ', m.num_cols)                    # number of columns (variables) in the model
print('Number of integer variables: ', m.num_int)             # number of integer variables in the model
print('Number of non-zeros in constraint matrix: ', m.num_nz) # number of non-zeros in the constraint matrix

Number of constraints:  304
Number of variables:  3660
Number of integer variables:  60
Number of non-zeros in constraint matrix:  14700


In [9]:
print("Solve")
# Solve the problem
# m.max_gap = 0.1
# m.threads = -1

status = m.optimize()  # max_seconds=100

print(status)
# Check the status and show the solutions
if status == OptimizationStatus.OPTIMAL:
    print('optimal solution cost {} found'.format(m.objective_value))
elif status == OptimizationStatus.FEASIBLE:
    print('sol.cost {} found, best possible: {}'.format(m.objective_value, m.objective_bound))
elif status in [OptimizationStatus.NO_SOLUTION_FOUND, OptimizationStatus.INFEASIBLE]:
    print('no feasible solution found, lower bound is: {}'.format(m.objective_bound))
if status == OptimizationStatus.OPTIMAL or status == OptimizationStatus.FEASIBLE:
    # print('solution:')
    d_sol = {}
    for v in m.vars:
        d_sol.update({v.name: v.x})
    # print("Solution: ", d_sol)
    df_sol = pd.DataFrame.from_dict(d_sol, orient='index', columns=['biomass'])
    df_sol.to_csv(os.path.join(OUT_SYNTH_DATA_PATH, 'solution.csv'))

Solve
Welcome to the CBC MILP Solver 
Version: Trunk
Build Date: Oct 24 2021 

Starting solution of the Linear programming relaxation problem using Dual Simplex

Coin0506I Presolve 304 (0) rows, 3660 (0) columns and 14700 (0) elements
Clp0014I Perturbing problem by 0.001% of 20916.457 - largest nonzero change 0.018922632 ( 0.17406027%) - largest zero change 0
Clp0000I Optimal - objective value -0.00019404254
Clp0032I Optimal objective -0.0001940425357 - 379 iterations time 0.022
Clp1000I Unscaled problem has primal infeasibilities

Starting MIP optimization
Cgl0004I processed model has 304 rows, 3660 columns (60 integer (60 of which binary)) and 14700 elements
Coin3009W Conflict graph built in 0.001 seconds, density: 0.000%
Cgl0015I Clique Strengthening extended 0 cliques, 0 were dominated
Cbc0045I Nauty did not find any useful orbits in time 0.105638
Cbc0038I Initial state - 48 integers unsatisfied sum - 0.826666
Cbc0038I Pass   1: suminf.    0.22667 (2) obj. 32237.4 iterations 200
Cb

In [10]:
# Sanity check for all quantities
sent_biomass_to_depots_18 = df_sol.loc[df_sol.index.str.startswith('b_2018'), 'biomass'].sum()
sent_pellets_to_refinery_18 = df_sol.loc[df_sol.index.str.startswith('p_2018'), 'biomass'].sum()
print('Forecasted biomass for year 2018: ', total_fc_18)
print('Biomass transported to depots for year 2018: ', sent_biomass_to_depots_18)
print('Biomass transported to depots for year 2018: ', sent_pellets_to_refinery_18)
print()
sent_biomass_to_depots_19 = df_sol.loc[df_sol.index.str.startswith('b_2019'), 'biomass'].sum()
sent_pellets_to_refinery_19 = df_sol.loc[df_sol.index.str.startswith('p_2019'), 'biomass'].sum()
print("Forecasted biomass for year 2019: ", total_fc_19)
print('Biomass transported to depots for year 2019: ', sent_biomass_to_depots_19)
print('Biomass transported to depots for year 2019: ', sent_pellets_to_refinery_19)

Forecasted biomass for year 2018:  1488.016473495
Biomass transported to depots for year 2018:  1488.016473495
Biomass transported to depots for year 2018:  1488.0174734949999

Forecasted biomass for year 2019:  1766.976034022
Biomass transported to depots for year 2019:  1766.9760340219998
Biomass transported to depots for year 2019:  1766.9770340219998


In [11]:
# underuse_cost = float(xsum(2*cap_b_j*x[j] + 2*cap_p_j*r[j] for j in ls_j)) + \
#                        float(xsum(- df_sol.loc[f'b_2018_{i}_{j}'][0] - df_sol.loc[f'b_2019_{i}_{j}'][0] \
#                             for i in range(len(d_matrix)) for j in ls_j)) + \
#                        float(xsum(- df_sol.loc[f'p_2018_{i}_{j}'][0] - df_sol.loc[f'p_2019_{i}_{j}'][0] \
#                             for i in ls_j for j in ls_j))
# print(underuse_cost)