In [16]:
#Imports
import random
import pandas as pd
import numpy as np
import fileinput
import matplotlib.pyplot as plt
from pathlib import Path
random.seed(1)

#The 7 Implementations of the generation rules for profits and weights, given maximum value R
#Each of the generation rules together with a visualization is given in Section 4.1.1 of the thesis
def generate_rule_uncorrelated(instance, R): #t = 1
    for index in instance.index:
        instance.at[index, "weight"] = random.randint(1, R)
        instance.at[index, "profit"] = random.randint(1, R)
def generate_rule_weakly_correlated(instance, R): #t = 2
    for index in instance.index:
        instance.at[index, "weight"] = random.randint(1, R)
        instance.at[index, "profit"] = random.randint(max(instance.at[index, "weight"]-R/10, 1), instance.at[index, "weight"]+R/10)
def generate_rule_strongly_correlated(instance, R): #t = 3
    for index in instance.index:
        instance.at[index, "weight"] = random.randint(1, R)
        instance.at[index, "profit"] = int(instance.at[index, "weight"]+R/10)
def generate_rule_inverse_strongly_correlated(instance, R): #t = 4
    for index in instance.index:
        instance.at[index, "profit"] = random.randint(1, R)
        instance.at[index, "weight"] = int(instance.at[index, "profit"]+R/10)
def generate_rule_almost_strongly_correlated(instance, R): #t = 5
    for index in instance.index:
        instance.at[index, "weight"] = random.randint(1, R)
        instance.at[index, "profit"] = random.randint(instance.at[index, "weight"]+R/10-R/500, instance.at[index, "weight"]+R/10+R/500)
def generate_rule_subset_sum(instance, R): #t = 6
    for index in instance.index:
        instance.at[index, "weight"] = random.randint(1, R)
        instance.at[index, "profit"] = instance.at[index, "weight"]
def generate_rule_uncorrelated_similar_weights(instance, R): #t = 9
    for index in instance.index:
        instance.at[index, "weight"] = random.randint(R, R+R/1000)
        instance.at[index, "profit"] = random.randint(1, R)

#Function to generate Instance: First as DataFrame, later as lines to add the first 5 rows
#Inputs: t = generation class, R = max_range, i = number of instance, max_i = maximum number of instances (here: 580)
def generateInstance(t, R, i, max_i): #Inputs: t
    name = "knapPI_"+str(t)+"_1000_"+str(R)+"_"+str(i)
    instance = pd.DataFrame(columns=["id", "profit", "weight", "x"], index=range(1000)) #Lines 6-1,005
    instance["id"] = range(1, 1001)
    instance["x"] = 1 #produces an invalid solution if the instances were solved with the configuration 
    match t:
        case 1: generate_rule_uncorrelated(instance, R)
        case 2: generate_rule_weakly_correlated(instance, R)
        case 3: generate_rule_strongly_correlated(instance, R)
        case 4: generate_rule_inverse_strongly_correlated(instance, R)
        case 5: generate_rule_almost_strongly_correlated(instance, R)
        case 6: generate_rule_subset_sum(instance, R)
        case 9: generate_rule_uncorrelated_similar_weights(instance, R)
        case _: pass
    
    instance["profit"] = pd.to_numeric(instance["profit"])
    instance["weight"] = pd.to_numeric(instance["weight"])
    capacity = round((i/(max_i+1))*instance["weight"].sum()) #Capacity, defined in Equation 4.1
    max_value = instance["profit"].sum() #Initialize the maximum value with the sum of all profits, will be overwritten

    filepath = "knapsacksolver/dataset/"+str(name)+".csv"

    #Save instance in as filepath
    instance.to_csv(filepath, index=False, sep=",", header=False, index_label=None, na_rep="")
    
    #Add the first five rows to each instance to receive the format "Pisinger" as shown in Table 1
    f = fileinput.input(filepath, inplace=1)
    for xline in f:
        if f.isfirstline(): #Add rows as the first lines
            print(  name  
                    + "\n" + "n 1000"
                    + "\n" + "c "+ str(capacity) 
                    + "\n" + "z " + str(max_value)
                    + "\n" + "time 0.00"
                    + '\n' + xline.rstrip("\r\n"))
        else:
            print(xline.rstrip("\r\n")) #Leave the other lines unchanged
    print("New Instance "+name+" created.")

In [None]:
#Actual generation of the instances (Will be saved in folder dataset)
number_of_generations = 580
#Total Instances = 580 * 7 * 5 = 20300

for t in [1,2,3,4,5,6,9]: #All seven genation rules
    for R in [1000, 10000, 100000, 1000000, 10000000]: #Five different value intervals 
        for i in range(1,number_of_generations+1): #580 Instances per rule
            generateInstance(t, R, i, number_of_generations) #Leads to 20,300 instances