<a href="https://colab.research.google.com/github/ARidwanW/lab-datascience-ibm/blob/main/Lab_Exploring_Data_using_IBM_Cloud_Gallery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Finding optimal locations of new stores using Decision Optimization
--------------------------

# Step 1: Import the docplex package

In [4]:
!pip install docplex



In [5]:
import sys
import docplex.mp

# Step 2: Model the data

- The data for this problem is quite simple: it is composed of the list of public libraries and their geographical locations.
- The data is acquired from [Chicago open data](https://data.cityofchicago.org/) as a JSON file, which is in the following format:

data" : [ [ 1, "13BFA4C7-78CE-4D83-B53D-B57C60B701CF", 1, 1441918880, "885709", 1441918880, "885709", null, "Albany Park", "M, W: 10AM-6PM;  TU, TH: 12PM-8PM; F, SA: 9AM-5PM; SU: Closed", "Yes", "Yes ", "3401 W. Foster Avenue", "CHICAGO", "IL", "60625", "(773) 539-5450", [ "http://www.chipublib.org/locations/1/", null ], [ null, "41.975456", "-87.71409", null, false ] ]
This code snippet represents library "**3401 W. Foster Avenue**" located at **41.975456, -87.71409**


# Step 3: Prepare the data

In [7]:
# from google.colab import files
# uploaded = files.upload()

Saving Libraries_-_Locations___Contact_Information__and_Usual_Hours_of_Operation.csv to Libraries_-_Locations___Contact_Information__and_Usual_Hours_of_Operation.csv


In [8]:
# Store logitude, latitude and street crossing name of each public library location
class XPoint(object):
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __str__(self):
    return "P(%g_%g)" % (self.x, self.y)

class NamedPoint(XPoint):
  def __init__(self, name, x, y):
    XPoint.__init__(self, x, y)
    self.name = name
  def __str__(self):
    return self.name

## Define How to compute the earth distance between 2 points

In [9]:
!pip install geopy



In [10]:
try:
  import geopy.distance
except:
  if hasattr(sys, 'real_prefix'):
    #we are in a virtual env.
    !pip install geopy
  else:
    !pip install --user geopy


In [11]:
# Simple distnace computation between 2 location
from geopy.distance import great_circle

def get_distance(p1, p2):
  return great_circle((p1.y, p1.x), (p2.y, p2.x)).miles

## Declare the list of libraries

In [12]:
from IPython.terminal.ipapp import TerminalIPythonApp
def build_libraries_from_url(url, name_pos, lat_long_pos):
  import requests
  import json

  r = requests.get(url)
  myjson = json.loads(r.text, parse_constant='utf-8')
  myjson = myjson['data']

  libraries = []
  k = 1
  for location in myjson:
    uname = location[name_pos]
    try:
      latitude = float(location[lat_long_pos][1])
      longitude = float(location[lat_long_pos][2])
    except TypeError:
      latitude = longitude = None

    try:
      name = str(uname)
    except:
      name = "???"

    name = "P_%s_%d" % (name, k)
    if latitude and longitude:
      cp = NamedPoint(name, longitude, latitude)
      libraries.append(cp)
      k += 1
  return libraries

In [13]:
libraries = build_libraries_from_url('https://data.cityofchicago.org/api/views/x8fc-8rcq/rows.json?accessType=DOWNLOAD',
                                     name_pos=10,
                                     lat_long_pos=16)

In [14]:
print("There are %d public libraries in Chicago" % (len(libraries)))

There are 81 public libraries in Chicago


In [18]:
for library in libraries:
  print(f"Library Name: {library.name}")
  print(f"Long: {library.x}")
  print(f"Lat: {library.y}")

Library Name: P_3710 E. 106th St._1
Long: -87.61428978448026
Lat: 41.70283443594318
Library Name: P_3401 W. Foster Ave._2
Long: -87.71361314512697
Lat: 41.97557881655979
Library Name: P_8148 S. Stony Island Ave._3
Long: -87.5860053710736
Lat: 41.746393038286826
Library Name: P_1350 W. 89th St._4
Long: -87.65772892721816
Lat: 41.73244482025524
Library Name: P_163 E. Pearson St._5
Long: -87.62337776811282
Lat: 41.897484072390675
Library Name: P_1336 W. Taylor St._6
Long: -87.66072612668584
Lat: 41.86946341218242
Library Name: P_5331 W. Devon Ave._7
Long: -87.76211751799003
Lat: 41.997213372156295
Library Name: P_6151 S. Normal Blvd._8
Long: -87.63738794358196
Lat: 41.78202569697298
Library Name: P_6000 N. Broadway St._9
Long: -87.660442745415
Lat: 41.99095923823291
Library Name: P_5724 W. North Ave._10
Long: -87.76898326931509
Lat: 41.909444834238485
Library Name: P_642 W. 43rd St._11
Long: -87.64256827550598
Lat: 41.81634394084193
Library Name: P_3400 S. Halsted St._12
Long: -87.6462998

## Define number of shops to open

In [19]:
nb_shops = 5
print("We would like to open %d coffee shops" % nb_shops)

We would like to open 5 coffee shops


## Validate the data by displaying them

In [20]:
try:
  import folium
except:
  if hasattr(sys, 'real_prefix'):
    #we are in a virtual env.
    !pip install folium
  else:
    !pip install folium


In [22]:
import folium
map_osm = folium.Map(location=[41.878, -87.629], zoom_start=11)
for library in libraries:
  lt = library.y
  lg = library.x
  folium.Marker([lt, lg]).add_to(map_osm)

map_osm

# Step 4: Set up the prescriptive model

In [24]:
!pip install cplex

Collecting cplex
  Downloading cplex-22.1.1.0-cp310-cp310-manylinux1_x86_64.whl (44.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.2/44.2 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cplex
Successfully installed cplex-22.1.1.0


In [25]:
from docplex.mp.environment import Environment
env = Environment()
env.print_information()

* system is: Linux 64bit
* Python version 3.10.6, located at: /usr/bin/python3
* docplex is present, version is 2.25.236
* CPLEX library is present, version is 22.1.1.0, located at: /usr/local/lib/python3.10/dist-packages
* pandas is present, version is 1.5.3


## Create the DOcplex model

In [42]:
from docplex.mp.model import Model

mdl = Model("coffee shops")

## Define the decision variables

In [43]:
BIGNUM = 999999999 # 9's 9

# Ensure unique points
libraries = set(libraries)
'''
  For simplicity, let's consider that coffee shops candidate location
are the same as libraries locations.
'''
# That is: any library location can also be selected as a coffee shop.
coffeeshops_locations = libraries

# Decision vars
# Binary vars indicating which coffee ship locations will be actually selected
coffeeshops_vars = mdl.binary_var_dict(coffeeshops_locations, name='is_coffeeshop')

# Binary vars representing the "assigned" libraries for each coffee shop
link_vars = mdl.binary_var_matrix(coffeeshops_locations, libraries, "link")

## Express the business constraints

- First: if the distance is suspect, it needs to be excluded from the problem

In [44]:
for c_loc in coffeeshops_locations:
  for b in libraries:
    if get_distance(c_loc, b) >= BIGNUM:
      mdl.add_constraint(link_vars[c_loc, b] == 0, \
                         "ct_forbid_{0!s}_{1!s}".format(c_loc, b))

mdl.print_information()

Model: coffee shops
 - number of variables: 6642
   - binary=6642, integer=0, continuous=0
 - number of constraints: 0
   - linear=0
 - parameters: defaults
 - objective: none
 - problem type is: MILP


- Second: each library must be linked to a coffee shop that is open

In [45]:
mdl.add_constraints(link_vars[c_loc, b] <= coffeeshops_vars[c_loc]\
                   for b in libraries\
                   for c_loc in coffeeshops_locations)
mdl.print_information()

Model: coffee shops
 - number of variables: 6642
   - binary=6642, integer=0, continuous=0
 - number of constraints: 6561
   - linear=6561
 - parameters: defaults
 - objective: none
 - problem type is: MILP


- Third: each library is linked to exactly one coffee shop

In [46]:
mdl.add_constraints(mdl.sum(link_vars[c_loc, b] \
                            for c_loc in coffeeshops_locations) == 1 \
                    for b in libraries)
mdl.print_information()

Model: coffee shops
 - number of variables: 6642
   - binary=6642, integer=0, continuous=0
 - number of constraints: 6642
   - linear=6642
 - parameters: defaults
 - objective: none
 - problem type is: MILP


- Fourth: there is a fixed number of coffee shops to open

In [47]:
# Total nb of open coffee shops
mdl.add_constraint(mdl.sum(coffeeshops_vars[c_loc] \
                           for c_loc in coffeeshops_locations) == nb_shops)

# Print model information
mdl.print_information()

Model: coffee shops
 - number of variables: 6642
   - binary=6642, integer=0, continuous=0
 - number of constraints: 6643
   - linear=6643
 - parameters: defaults
 - objective: none
 - problem type is: MILP


## Express the objective

The objective is to minimize the total distance from libraries to coffee shops so that a book reader always gets to our coffee shop easily

In [50]:
# Minimize total distance from points to hubs
total_distance = mdl.sum(link_vars[c_loc, b] * get_distance(c_loc,b)\
                         for c_loc in coffeeshops_locations\
                         for b in libraries)

mdl.minimize(total_distance)
mdl.print_information()

Model: coffee shops
 - number of variables: 6642
   - binary=6642, integer=0, continuous=0
 - number of constraints: 6643
   - linear=6643
 - parameters: defaults
 - objective: minimize
 - problem type is: MILP


## Solve with the Decision Optimization solve service

In [49]:
print("# coffee shops locations   = %d" % len(coffeeshops_locations))
print("# coffee shops             = %d" % nb_shops)

assert mdl.solve(), "!!! Solve of the model fails"

# Community edition limit 1000 :(

# coffee shops locations   = 81
# coffee shops             = 5


DOcplexLimitsExceeded: ignored

# Step 5: Investigate the solution and then run an example analysis

In [52]:
total_distance = mdl.objective_value
open_coffeeshops = [c_loc for c_loc in coffeeshops_locations if coffeeshops_vars[c_loc].solution_value == 1]
not_coffeeshops = [c_loc for c_loc in coffeeshops_locations if c_loc not in open_coffeeshops]
edges = [(c_loc, b) for b in libraries for c_loc in coffeeshops_locations if int(link_vars[c_loc, b]) == 1]

print("Total distance = %g" % total_distance)
print("# coffee shops  = {0}".format(len(open_coffeeshops)))
for c in open_coffeeshops:
    print("new coffee shop: {0!s}".format(c))

DOcplexException: ignored

## Displaying the solution

In [53]:
import folium
map_osm = folium.Map(location=[41.878, -87.629], zoom_start=11)
for coffeeshop in open_coffeeshops:
    lt = coffeeshop.y
    lg = coffeeshop.x
    folium.Marker([lt, lg], icon=folium.Icon(color='red',icon='info-sign')).add_to(map_osm)

for b in libraries:
    if b not in open_coffeeshops:
        lt = b.y
        lg = b.x
        folium.Marker([lt, lg]).add_to(map_osm)


for (c, b) in edges:
    coordinates = [[c.y, c.x], [b.y, b.x]]
    map_osm.add_child(folium.PolyLine(coordinates, color='#FF0000', weight=5))

map_osm

NameError: ignored