In [1]:
import cvxopt
from cvxopt import glpk
import numpy as np
import pandas as pd
import time
import random as rnd

# ДИНАМИКА ПО СТОИМОСТИ

In [2]:

def knapsack_dinamic_cost(N, M, weights, costs):
    '''Динамика по ценности'''
    
    
    COST = sum(costs)
    
    y = [[0 for i in range(COST+1)] for j in range(N+1)]
    """В y храним массы
        y[i][j] - минимальная масса, которую можно набрать используя 
                    первые i предметов, чтобы их суммарная стоимость
                    была равна j
    """
    for i in range(1, COST+1):
        y[0][i] = M + 1
    

    for i in range(1,N+1):
        for V in range(COST+1):

            if costs[i-1] > V:#проверка чтобы не зайти в else
                y[i][V] = y[i-1][V]
            else:
                y[i][V] = min(y[i-1][V], y[i-1][V-costs[i-1]] + weights[i-1])
        
    #Построили матрицу y[i,V]
    #где i -  используем до i-го предмета
    #V - стоимость которуюю набирвем


    IND = COST
    while y[N][IND] == M+1:
        IND-=1
    #IND - максимальная цена, которую можно набрать используя все предметы


    res = IND



    lis =[0 for i in range(N)]
    for i in range(N, -1,-1):
        if y[i-1][IND] != y[i][IND]:
            lis[i-1]=1
            IND-=costs[i-1]



    return res, lis

In [3]:
def check_correctness(func, test):
    cost, res = func(len(test["weights"]), test["W"],
                      test["weights"], test["costs"])
    
    real_cost = 0
    
    for i in range(len(test["weights"])):
        if test["ans"][i] == 1:
            real_cost += test["costs"][i]
            
    if real_cost == cost and (res == test["ans"]).all():
        return ("EXACT", res, test["ans"])
    if real_cost == cost:
        return "CORRECT", res, test["ans"]
    return "FALSE", res, test["ans"]

# ДИНАМИКА ПО ВЕСАМ

In [4]:

def knapsack_dinamic_weight(N, M, weights, costs):
    '''Динамика по весам'''


    c = [0 for i in range(M+1)]
    """
        c[i] - Максимальная стоимость, которую можно набрать,
                используя рюкзак размера i
    """
    lists = [[] for i in range(M+1)] 
    c[0] = 0

    for W in range(1, M+1):
        c[W] = c[W-1]
        lists[W] = lists[W-1]

        for i in range(N):
            w = weights[i]
            if W - w >= 0 and  i not in lists[W-w]:
                    if c[W] < costs[i] + c[W-w]:
                        lists[W] = lists[W-w] + [i]
                        c[W] = costs[i] + c[W-w]
    res = [0 for i in range(N)]
    for i in lists[M]:
        res[i] = 1
        
    return c[M], res

# ГЕНЕТИЧЕСКИЙ АЛГОРИТМ

In [51]:
import random as rnd
import numpy as np
rnd.seed(42)

class Knapsack_DNA():
    def __init__(self, P=10_000, epoch=30, prob_mutation=0.005, num_best=1000):
        
        if num_best > P:
            raise ValueError("num_best > P")
            
        self.P = P
        self.epoch = epoch
        self.prob_mutation = prob_mutation
        self.num_best = num_best
        
    
    
    def __mutate(self, DNA):
        for i in range(len(DNA)):
            if rnd.random() < self.prob_mutation:
                DNA[i] ^= 1
        return DNA
    
    
    
    def __crossover(self, DNA1, DNA2):
        child_DNA = DNA1[:]
        child_DNA[::2] = DNA1[::2]
        child_DNA[1::2] = DNA2[1::2]
        return DNA1
    
    
    
    def __loss_func(self, DNA):
        
        mask = np.where(DNA==1)
        weight = self.weights[mask].sum()
        
        cost = 0
        if weight <= self.Weight:
            cost = self.costs[mask].sum()
        
        return cost
    
    
    def __begin_epoch(self):
        errors = np.array(list(map(lambda x: self.__loss_func(x), self.Population)))
        indexes_of_bests = np.argpartition(-errors, self.num_best)
        Best = self.Population[indexes_of_bests[:self.num_best],:]
        
        
        self.Population[:self.num_best,:] = Best[:,:]
        
        for i in range(self.num_best,
                       self.Population.shape[0]):
            
            fir = rnd.randint(0, self.num_best-1)
            sec = rnd.randint(0, self.num_best-1)
            self.Population[i] = self.__crossover(Best[fir,:], Best[sec,:])
            self.__mutate(self.Population[i])
 
            
        
        
    
    
    def fit(self, test):
        self.N = len(test["weights"])
        self.Weight = test["W"]
        self.weights = np.array(test["weights"])
        self.costs = np.array(test["costs"])
        self.ans = np.array(test["ans"])
        
        self.Population = np.array([[0 for i in range(self.N)] for j in range(self.P)])
        for i in range(self.epoch):
            self.__begin_epoch()
            errors = np.array(list(map(lambda x: self.__loss_func(x), self.Population)))

            
    def predict(self):
        errors = np.array(list(map(lambda x: self.__loss_func(x), self.Population)))
        pos = errors.argmax()
        
        return self.__loss_func(self.Population[pos,:]), self.Population[pos,:]
    
    def score(self):
        cost, res = self.predict()
        
    
        real_cost = self.costs[np.where(self.ans == 1)].sum()
        
        if real_cost == cost and (res == self.ans).all():
            return ("EXACT", res, self.ans)
        if real_cost == cost:
            return "CORRECT", res, self.ans
        
        return "DIFF="+str((real_cost - cost) / real_cost)[:6], res, self.ans
        
    

# ЛИНЕЙНОЕ ПРОГРАММИРОВАНИЕ

$x_i \le 1$

$x_i \ge 0 \Leftrightarrow  -x_i \le 0 $

$\sum_{i=0}^{N}$ weights$_i \cdot x_i \le W$

$\sum_{i=0}^{N}$ costst$_i \cdot x_i \rightarrow max$

In [6]:
def linear_programming(N, W, weights, costs):
    b = []
    A = []
    
    c = [-i for i in costs]
    for i in range(N):
        b.append(1)
        
    for i in range(N):
        b.append(0)
        
    b.append(W)
    
    for i in range(N):
        string = [0 for i in range(N)]
        string[i] = 1
        A.append(string)
        
    for i in range(N):
        string = [0 for i in range(N)]
        string[i] = -1
        A.append(string)
        
    string = [i for i in weights]
    A.append(string)
    
    
    
    A = cvxopt.matrix(np.array(A).astype(np.float))
    b = cvxopt.matrix(np.array(b).astype(np.float))
    c = cvxopt.matrix(np.array(c).astype(np.float))
    
    
    sol = glpk.ilp(c, A, b, I={i for i in range(N)})
    sol = list(sol[1])

    return np.array(sol)
    

In [7]:
def check_correctness_lp(test):
    res = linear_programming(len(test["weights"]), test["W"],
                  test["weights"], test["costs"])
    cost = np.dot(res.reshape(-1),np.array(test["costs"]))
    real_cost = np.dot(np.array(test["ans"]),np.array(test["costs"]))
    if cost == real_cost and (res.astype(np.int) == np.array(test["ans"])).all():
        return ("EXACT", res, test["ans"])
        
    if cost == real_cost:
        return ("CORRECT", res, test["ans"])
        
    
    return ("FALSE", res, test["ans"])
    
    

# СРАВНЕНИЕ РЕЗУЛЬТАТОВ

In [8]:
def parse_test_file(filename):
    f = open(filename, "r")
    text = f.readlines()
    index = 1
    N = len(text)
    tests = []
    while index < N:
        n = int(text[index].split()[1])
        index+=1
        
        WEIGHT = int(text[index].split()[1])
        index+=1
        
        COST = int(text[index].split()[1])
        index+=2

        weights = []
        costs = []
        ans = []
        
        for i in range(n):
            k, cost, weight, ans_i = text[index].split(",")
            costs.append(int(cost))
            weights.append(int(weight))
            ans.append(int(ans_i))
            index+=1
        index+=3
        weights = np.array(weights)
        costs = np.array(costs)
        ans = np.array(ans)

        tests.append({"W": WEIGHT,
                    "weights": weights,
                    "costs": costs,
                    "ans": ans
                     })
        
    return tests
    

In [9]:
def make_tables(filename:str):
    
    tests = parse_test_file(filename)
    
    tab_correct = pd.DataFrame(
                   columns=["test"+str(i) for i in range(len(tests))]
                   , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    tab_time = pd.DataFrame(
                       columns=["test"+str(i) for i in range(len(tests))]
                       , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    
    TIME = time.time()
    print("BEGIN")
    # ДИНАМИКА ПО ЦЕНЕ
    for i in range(len(tests)):
        start_time = time.time()

        tab_correct.iloc[0,i] = check_correctness(knapsack_dinamic_cost, tests[i])[0]
        tab_time.iloc[0,i] = time.time() - start_time
#         print(i)
   
    print("Закончил динамику по цене", time.time()-TIME, "секунд")    
    
    
    
    # ДИНАМИКА ПО ВЕСАМ
    for i in range(len(tests)):
        start_time = time.time()

        tab_correct.iloc[1,i] = check_correctness(knapsack_dinamic_weight, tests[i])[0]
        tab_time.iloc[1,i] = time.time() - start_time
#         print(i)
        
    print("Закончил динамику по весам", time.time()-TIME, "секунд")
    
    
    # Генетический алгоритм
    mdl = Knapsack_DNA(P=2000, epoch=50, num_best=1000)
    
    for i in range(len(tests)):
        start_time = time.time()
        mdl.fit(tests[i])
        tab_correct.iloc[2,i] = mdl.score()[0]
        tab_time.iloc[2,i] = time.time() - start_time      
#         print(i)
        
    print("Закончил генетическйи", time.time()-TIME, "секунд")
    
    # ЛИНЕЙНОЕ ПРОГРАММИРОВАНИЕ
    for i in range(len(tests)):
        start_time = time.time()

        tab_correct.iloc[3,i] = check_correctness_lp(tests[i])[0]
        tab_time.iloc[3,i] = time.time() - start_time
#         print(i)
        
    print("Закончил линейное программирование", time.time()-TIME, "секунд")
    
    return tab_correct.T, tab_time.T

In [10]:
tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_11_20_1000.csv")

BEGIN
Закончил динамику по цене 31.42858362197876 секунд
Закончил динамику по весам 44.85989212989807 секунд
Закончил генетическйи 262.1053102016449 секунд
Закончил линейное программирование 262.339448928833 секунд


In [96]:
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", None)

In [12]:
tab_corr

Unnamed: 0,Дин. Стоимость,Дин. Вес,Генетический,Цел.Лин.Прог
test0,CORRECT,CORRECT,CORRECT,CORRECT
test1,CORRECT,CORRECT,DIFF=0.0476,EXACT
test2,CORRECT,CORRECT,CORRECT,CORRECT
test3,EXACT,EXACT,CORRECT,EXACT
test4,CORRECT,CORRECT,EXACT,CORRECT
test5,CORRECT,CORRECT,CORRECT,EXACT
test6,CORRECT,CORRECT,CORRECT,CORRECT
test7,EXACT,EXACT,DIFF=0.2933,CORRECT
test8,CORRECT,CORRECT,DIFF=0.5399,CORRECT
test9,CORRECT,CORRECT,CORRECT,CORRECT


In [13]:
tab_time

Unnamed: 0,Дин. Стоимость,Дин. Вес,Генетический,Цел.Лин.Прог
test0,0.233577,0.0363545,1.70486,0.0011518
test1,0.583711,0.0168872,2.02242,0.000798941
test2,0.367755,0.0225561,2.03139,0.000863552
test3,0.262745,0.0272725,2.17455,0.000537157
test4,0.181855,0.0156827,1.85348,0.000731468
test5,0.509412,0.0212855,1.93577,0.00100517
test6,0.302433,0.0235229,2.10416,0.00117326
test7,0.284727,0.0301585,1.91204,0.000626802
test8,0.222415,0.0262959,1.73204,0.000864744
test9,0.486794,0.0272348,2.07754,0.000702858


In [67]:
def make_tables(filename:str):
    
    tests = parse_test_file(filename)[:10]
    
    tab_correct = pd.DataFrame(
                   columns=["test"+str(i) for i in range(len(tests))]
                   , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    tab_time = pd.DataFrame(
                       columns=["test"+str(i) for i in range(len(tests))]
                       , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    
    TIME = time.time()
    print("BEGIN")

    
    
    # Генетический алгоритм
    mdl = Knapsack_DNA(P=2000, epoch=50, num_best=1000)
    
    for i in range(len(tests)):
        start_time = time.time()
        mdl.fit(tests[i])
        tab_correct.iloc[2,i] = mdl.score()[0]
        tab_time.iloc[2,i] = time.time() - start_time      
        print(i)
        
    print("Закончил генетический", time.time()-TIME, "секунд")

    
    return tab_correct.T, tab_time.T

In [68]:
tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_11_2000_1000.csv")

BEGIN
0
1
2
3
4
5
6
7
8
9
Закончил генетический 143.8755156993866 секунд


In [66]:
tab_corr["Генетический"]

test0    DIFF=0.0820
test1    DIFF=0.1524
test2    DIFF=0.1496
test3    DIFF=0.0346
test4    DIFF=0.1346
test5    DIFF=0.0070
test6    DIFF=0.1145
test7    DIFF=0.0728
test8    DIFF=0.1812
test9    DIFF=0.1743
Name: Генетический, dtype: object

In [70]:
tab_time = tab_time["Генетический"]
tab_time

test0    13.7596
test1    13.9179
test2     14.077
test3    14.2487
test4    14.3399
test5    14.4254
test6    14.6041
test7    14.5765
test8    14.7239
test9    15.2005
Name: Генетический, dtype: object

In [71]:
def make_tables(filename:str):
    
    tests = parse_test_file(filename)[:10]
    
    tab_correct = pd.DataFrame(
                   columns=["test"+str(i) for i in range(len(tests))]
                   , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    tab_time = pd.DataFrame(
                       columns=["test"+str(i) for i in range(len(tests))]
                       , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    
    TIME = time.time()
    print("BEGIN")
  
    
    
    
    # ДИНАМИКА ПО ВЕСАМ
    for i in range(len(tests)):
        start_time = time.time()

        tab_correct.iloc[1,i] = check_correctness(knapsack_dinamic_weight, tests[i])[0]
        tab_time.iloc[1,i] = time.time() - start_time
        print(i)
        
    print("Закончил динамику по весам", time.time()-TIME, "секунд")
    
    
    
    
    return tab_correct.T, tab_time.T

In [72]:
tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_11_2000_1000.csv")

BEGIN
0
1
2
3
4
5
6
7
8
9
Закончил динамику по весам 2927.8003854751587 секунд


In [73]:
tab_corr["Дин. Вес"]

test0    CORRECT
test1    CORRECT
test2    CORRECT
test3    CORRECT
test4    CORRECT
test5    CORRECT
test6    CORRECT
test7    CORRECT
test8    CORRECT
test9    CORRECT
Name: Дин. Вес, dtype: object

In [74]:
tab_time["Дин. Вес"]

test0    22.7665
test1    44.4403
test2    91.5539
test3    143.825
test4    163.708
test5    275.618
test6    414.845
test7    502.867
test8    596.828
test9    671.347
Name: Дин. Вес, dtype: object

In [112]:
def make_tables(filename:str):
    
    tests = parse_test_file(filename)
    
    tab_correct = pd.DataFrame(
                   columns=["test"+str(i) for i in range(len(tests))]
                   , index=["Генетический", "Цел.Лин.Прог"])
    tab_time = pd.DataFrame(
                       columns=["test"+str(i) for i in range(len(tests))]
                       , index=["Генетический", "Цел.Лин.Прог"])
    
    TIME = time.time()
    print("BEGIN")
    
    
    # ЛИНЕЙНОЕ ПРОГРАММИРОВАНИЕ
    for i in range(len(tests)):
        start_time = time.time()

        tab_correct.iloc[1,i] = check_correctness_lp(tests[i])[0]
        tab_time.iloc[1,i] = time.time() - start_time
#         print(i)
        
    print("Закончил линейное программирование", time.time()-TIME, "секунд")
    
    
    # Генетический алгоритм
    mdl = Knapsack_DNA(P=2000, epoch=50, num_best=1000)
    
    for i in range(len(tests)):
        start_time = time.time()
        mdl.fit(tests[i])
        tab_correct.iloc[0,i] = mdl.score()[0]
        tab_time.iloc[0,i] = time.time() - start_time      
#         print(i)
        
    print("Закончил генетический", time.time()-TIME, "секунд")
    

    
    return tab_correct.T, tab_time.T

In [113]:
tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_1_50_100000.csv")

BEGIN
Закончил линейное программирование 0.1452620029449463 секунд
Закончил генетический 207.88710522651672 секунд


In [114]:
tab_corr

Unnamed: 0,Генетический,Цел.Лин.Прог
test0,EXACT,EXACT
test1,DIFF=0.0136,EXACT
test2,DIFF=0.0181,EXACT
test3,EXACT,EXACT
test4,DIFF=0.0290,EXACT
test5,DIFF=0.0432,EXACT
test6,DIFF=0.0196,EXACT
test7,EXACT,EXACT
test8,DIFF=0.0157,EXACT
test9,DIFF=0.0113,EXACT


In [115]:
tab_time

Unnamed: 0,Генетический,Цел.Лин.Прог
test0,1.93284,0.00184202
test1,1.92855,0.00141287
test2,1.93035,0.00140071
test3,1.94424,0.00094986
test4,1.95923,0.00127172
test5,1.9474,0.00130773
test6,1.96411,0.000952244
test7,1.96898,0.000949144
test8,1.98253,0.00141144
test9,1.98123,0.00110555


In [116]:
    tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_1_50_10000000.csv")

BEGIN
Закончил линейное программирование 0.146148681640625 секунд
Закончил генетический 209.10362219810486 секунд


In [117]:
tab_corr

Unnamed: 0,Генетический,Цел.Лин.Прог
test0,EXACT,EXACT
test1,DIFF=0.0214,EXACT
test2,DIFF=0.0049,EXACT
test3,EXACT,EXACT
test4,EXACT,EXACT
test5,DIFF=0.0618,EXACT
test6,DIFF=0.0054,EXACT
test7,DIFF=0.0178,EXACT
test8,EXACT,EXACT
test9,EXACT,EXACT


In [118]:
tab_time

Unnamed: 0,Генетический,Цел.Лин.Прог
test0,1.93881,0.00140309
test1,2.00101,0.00116348
test2,2.00862,0.00132275
test3,2.00815,0.00111556
test4,2.00013,0.000932217
test5,2.01219,0.00106001
test6,2.02769,0.00110865
test7,2.01244,0.00117135
test8,2.00225,0.00152493
test9,2.0003,0.000984192


In [120]:
tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_1_2000_10000000.csv")

BEGIN
Закончил линейное программирование 116.24753212928772 секунд
Закончил генетический 1717.4354932308197 секунд


In [121]:
tab_corr

Unnamed: 0,Генетический,Цел.Лин.Прог
test0,DIFF=0.8160,EXACT
test1,DIFF=0.7724,EXACT
test2,DIFF=0.7466,EXACT
test3,DIFF=0.7057,EXACT
test4,DIFF=0.7089,EXACT
test5,DIFF=0.6590,EXACT
test6,DIFF=0.6326,EXACT
test7,DIFF=0.6560,EXACT
test8,DIFF=0.6210,EXACT
test9,DIFF=0.6001,EXACT


In [122]:
tab_time

Unnamed: 0,Генетический,Цел.Лин.Прог
test0,15.7984,0.878935
test1,15.7532,0.895175
test2,15.8695,0.995133
test3,16.0019,0.881687
test4,16.1744,0.778458
test5,16.2797,0.917977
test6,16.3648,1.04714
test7,16.3239,0.897745
test8,16.3737,0.838636
test9,16.3801,0.873024


In [124]:
def make_tables(filename:str):
    
    tests = parse_test_file(filename)[:10]
    
    tab_correct = pd.DataFrame(
                   columns=["test"+str(i) for i in range(len(tests))]
                   , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    tab_time = pd.DataFrame(
                       columns=["test"+str(i) for i in range(len(tests))]
                       , index=["Дин. Стоимость", "Дин. Вес", "Генетический", "Цел.Лин.Прог"])
    
    TIME = time.time()
    print("BEGIN")

    
    
    # Генетический алгоритм
    mdl = Knapsack_DNA(P=6000, epoch=70, num_best=500)
    
    for i in range(len(tests)):
        start_time = time.time()
        mdl.fit(tests[i])
        tab_correct.iloc[2,i] = mdl.score()[0]
        tab_time.iloc[2,i] = time.time() - start_time      
        print(i)
        
    print("Закончил генетический", time.time()-TIME, "секунд")

    
    return tab_correct.T, tab_time.T

In [125]:
tab_corr, tab_time = make_tables("./tests_knapsack/knapPI_1_2000_10000000.csv")

BEGIN
0
1
2
3
4
5
6
7
8
9
Закончил генетический 1063.283910036087 секунд


In [127]:
tab_corr["Генетический"]


test0    DIFF=0.7152
test1    DIFF=0.6351
test2    DIFF=0.6222
test3    DIFF=0.5822
test4    DIFF=0.5620
test5    DIFF=0.5510
test6    DIFF=0.5051
test7    DIFF=0.5089
test8    DIFF=0.4820
test9    DIFF=0.4556
Name: Генетический, dtype: object

In [128]:
tab_time["Генетический"]

test0    102.351
test1    103.559
test2    109.058
test3    109.435
test4    107.553
test5    106.751
test6    107.523
test7    104.554
test8    106.372
test9    106.125
Name: Генетический, dtype: object