In [1]:
# test project for Proj2
import numpy as np
import cvxpy as cp
import pandas as pd

## Knapsack Problem

### Problem Statement

We have a set of cars of different sizes and values and a parking lot at the dealership with a limited car capacity. How do we choose which cars to pack into the parking lot to maximize the total sale value?

### Define Variables and Parameters

* $N$ is the number of cars
* $s_i$ is the size of car $i$
* $v_i$ is the value of car $i$
* $x_i$ is the decision variables: $x_i = 1$ if car $i$ is chosen and $x_i = 0$ if not
* $c_i$ is the carbon tax of the car $i$
* $C$ is the total carbon tax amount the dealership paid. 
* $S$ is the capacity of the parking lot

### Identify Constraints and Make Assumptions

* Total area of the parked items is less than the parking lot capacity

In [None]:
# we have ten cars waiting to be chosen
N = 10
# value of cars varies
v = np.random.randint(40,300,N)
# size of the cars varies
s = np.random.randint(3,9,N)
# total size of all cars
size = np.sum(s)
# parking lot capacity
S = np.random.randint(size//2,2*size//3)
# carbon tax for each car
c = np.random.randint(10,20,N)
# total amount of carbon tax
tax = np.sum(c)
# total carbon tax the dealership would pay
C = np.random.randint(tax//2,2*tax//3)
print("Values =",v)
print("Size =",s)
print("Capacity =",S)
print("Carbon tax amount = ", C)
x = cp.Variable(N,integer=True)
obj = cp.Maximize(cp.sum(cp.multiply(v,x)))
constraints = [cp.sum(cp.multiply(s,x)) <= S,
               cp.sum(cp.multiply(c,x)) <= C,
                 x <= 1, x >= 0]
prob = cp.Problem(obj,constraints)
prob.solve()
x.value

v@x.value
s@x.value
value_per_size = v/s
print(value_per_size.round(2))

items = []
for _ in range(N):
    i = np.argmax(value_per_size)
    items.append(i)
    value_per_size[i] = 0.
    weight = sum([s[k] for k in items])
    if weight > C:
        items = items[:-1]
        break

Values = [229  94  65  40 262 223 120 141 211 214]
Size = [5 6 3 7 7 3 5 4 6 6]
Capacity = 32
Carbon tax amount =  82
[45.8  15.67 21.67  5.71 37.43 74.33 24.   35.25 35.17 35.67]


In [None]:
sorted(items)

[np.int64(0),
 np.int64(1),
 np.int64(2),
 np.int64(3),
 np.int64(4),
 np.int64(5),
 np.int64(6),
 np.int64(7),
 np.int64(8),
 np.int64(9)]

In [2]:
df = pd.read_csv('data/used_cars.csv')
print(list(df.columns))
df = df.dropna()

['brand', 'model', 'model_year', 'milage', 'fuel_type', 'engine', 'transmission', 'ext_col', 'int_col', 'accident', 'clean_title', 'price']


In [3]:
df['fuel_type'].unique()

array(['E85 Flex Fuel', 'Gasoline', 'Hybrid', 'Diesel', 'Plug-In Hybrid',
       '–', 'not supported'], dtype=object)

In [4]:
df_new = df[df['fuel_type'] != "not supported"]
df_new['fuel_type'].unique()
df_new = df_new[df_new['fuel_type'] != '']
df_new['fuel_type'].unique()
df_new.drop(columns=["model_year", "milage","engine","transmission","ext_col","int_col","accident","clean_title"], inplace=True)


In [5]:
df_new.dtypes

brand        object
model        object
fuel_type    object
price        object
dtype: object

In [None]:
N = 10
v = np.array(df_new["price"])
# assign carbon weight to fuel type
w = np.random.randint(3,20,N)
W = np.sum(w)
C = np.random.randint(W//2,2*W//3)
print("Values =",v)
print("Weight =",w)
print("Capacity =",C)
x = cp.Variable(N,integer=True)
obj = cp.Maximize(cp.sum(cp.multiply(v,x)))
constraints = [cp.sum(cp.multiply(w,x)) <= C, x <= 1, x >= 0]
prob = cp.Problem(obj,constraints)
prob.solve()
x.value

v@x.value
w@x.value
value_per_weight = v/w
print(value_per_weight.round(2))

items = []
for _ in range(N):
    i = np.argmax(value_per_weight)
    items.append(i)
    value_per_weight[i] = 0.
    weight = sum([w[k] for k in items])
    if weight > C:
        items = items[:-1]
        break

Values = ['$10,300' '$38,005' '$15,500' ... '$53,900' '$62,999' '$40,000']
Weight = [12 14  7 18  6  8 18 15 15 19]
Capacity = 72


ValueError: could not convert string to float: '$10,300'