# MENuS
## Using Machine Learning to select the best algorithm
* Define the features.
* Include the ingredients generator.
    * Generate an Instance of Ingredients list.
* Finish the model.
* Use minizinc from python.
    * Run it with 3 Algorithms.
    * Add the Instance to the Instances List.
    * Add the time it takes for each Algorithm to the Time Execution Matrix.
    * Transform the Instances List into the Features Array.
    * Travel through the Time Execution Matrix and pick the Algorithms with faster results for each Instance.
    * The Algorithm with the fastest result will be added to the Labels list.

|  | Alg1 | Alg2 | Alg3 |
| --- | --- | --- | --- |
| Ins1 | X | Y | Z |
| Ins2 | X | Y | Z |
| Ins3 | X | Y | Z |
   
* Transform the ingredients list into the dataset for machine learning.
    * Create the Ingredients list.
    * Test with the minizinc model, all the Alg = 3 -> minizinc(300).
    * Take and transform the one that takes less time.
* Training the AI. Split dataset, 70% for training and 30% for prediction, at least 80% accuracy expected. 
----------------------------------------------------------------------------------------------------------

### Auxiliar Functions

In [48]:
def print_red(text):
    print('\x1b[31m' + text + '\x1b[0m')
    
def print_yellow(text):
    print('\x1b[33m' + text + '\x1b[0m')

def print_green(text):
    print('\x1b[32m' + text + '\x1b[0m')

def print_pink(text):
    print('\x1b[35m' + text + '\x1b[0m')

def print_cyan(text):
    print('\x1b[36m' + text + '\x1b[0m')

In [49]:
import os   
import re

instances_folder = '../instances'  
instances = [os.path.join(instances_folder, f) for f in os.listdir(instances_folder) if f.endswith('.dzn')]
for i in range(len(instances)):
    with open(instances[i], encoding="utf8") as file_object:
        instances[i] = file_object.read()

print(instances[0])

n = 10;
budget = 200000;
groceries = [|2947,84,196,203,186,1900|1413,252,81,9,196,1752|2728,40,120,232,122,3507|2934,126,324,126,114,800|1393,7,231,49,60,2762|1710,75,60,130,89,3185|3008,120,200,192,184,3337|4293,81,324,297,87,3213|2510,50,15,250,160,3172|2328,48,156,168,53,1036|];
requirements = [|5860,6776|152,202|800,880|228,272|];


## Get the data
### Ingredients List
The instances have been generated using the notebook: Generate Instance.ipynb

In [50]:
import os
import subprocess 
import re

instances_folder = '..\instances'
instances = [os.path.join(instances_folder, f) for f in os.listdir(instances_folder) if f.endswith('.dzn')]
il = []
for i in range(len(instances)):
    with open(instances[i], encoding="utf8") as file_object:
        il.append(file_object.read())
#print(instances[0])
#print(il[0])

### Models with annotations
The models have been generated using the notebook: GenerateAnnotations.ipynb

In [51]:
instances_folder = '..\models'
model_1 = instances_folder+'\\first_fail-indomain.mzn'
model_2 = instances_folder+'\\anti_first_fail-indomain.mzn'
model_3 = instances_folder+'\\smallest-indomain.mzn'

# Create Features and Labels

In [52]:
import numpy as np

#-MINIZINC---------------------------------
n_inst=len(il)
#ingredient_size=5
tem = np.zeros((n_inst, 3))
#-MACHINE LEARNING-------------------------
q_features = 40
features = np.zeros((n_inst, q_features))
labels = np.zeros((n_inst), dtype=int)
#------------------------------------------

# Run Model-Annotation with Instance: Returns the Time Execution
def solve_with_minizinc(model, instance):
    timeout = 3
    # Run the minizinc command for the current model and data file  
    cmd = f"minizinc --solver Gecode --output-time {model} {instance}" 
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    try: 
        # Get output from fzn
        stdout, stderr = proc.communicate(timeout=timeout)
        elapsed_time = float(stdout.decode().split(' ')[-2])

        # print_green(f'{model} with {instance} SOLVED in {elapsed_time:.2f} seconds.')
        # times.append(elapsed_time)
        return elapsed_time
    except subprocess.TimeoutExpired: 
        # print_red(f'{model} with {instance} TIMED OUT after {timeout} seconds.')
        # times.append(timeout)
        if os.name == 'nt': # If the os is Windows
            subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)]) # Force kill the process
        else:
            os.killpg(os.getpgid(proc.pid), signal.SIGTERM) # Kill the process group
        return timeout
    
# Create Time Execution Matrix:
def create_TEM(n):
    for i in range(n):
        # Here there should be the Minizinc Time results.
        tem[i, 0] = solve_with_minizinc(model_1, instances[i])
        tem[i, 1] = solve_with_minizinc(model_2, instances[i])
        tem[i, 2] = solve_with_minizinc(model_3, instances[i])
        print("it",i)
    print("\nUpdated [tem].\n",tem)

In [53]:
#create_IL(n_inst,ingredient_size)
create_TEM(n_inst)
print(tem)

it 0
it 1
it 2
it 3
it 4
it 5
it 6
it 7
it 8
it 9

Updated [tem].
 [[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]]
[[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]]


In [54]:
def get_groceries(g_array):
    data_groceries = []
    for i in range(0, len(g_array)):
        loop_groceries = g_array[i].split("groceries = [|")
        loop_groceries = loop_groceries[1].split("|];\nrequirements")
        loop_groceries = loop_groceries[0].split("|")
        
        for j in range(0, len(loop_groceries)):
            # Conversion from String to Int Array.
            loop_groceries[j] = [int(x) for x in loop_groceries[j].split(",")]
            
        data_groceries = data_groceries + [np.array(loop_groceries)]
    return data_groceries

np_groceries = get_groceries(il)

def create_features(inst_array):
    for i in range(0,n_inst):
        # features: i in the left represents an Instance; 0,1... in the right represents a Feature.
        # np_groceries: i in the left represents an Instance; ":" represents all rows; 0,1... in the right represents a Macronutrient Column.
        # 0: Calories. 1: Protein. 2: Carbo. 3: Fat. 4: Quantity. 5: Price.
        features[i,0] = np.mean(np_groceries[i][:,0])
        features[i,1] = np.median(np_groceries[i][:,0])
        features[i,2] = np.std(np_groceries[i][:,0])
        features[i,3] = np.var(np_groceries[i][:,0])
        features[i,4] = np.min(np_groceries[i][:,0])
        features[i,5] = np.max(np_groceries[i][:,0])
        features[i,6] = np.argmin(np_groceries[i][:,0])
        features[i,7] = np.argmax(np_groceries[i][:,0])
        features[i,8] = np.percentile(np_groceries[i][:,0],25)
        features[i,9] = np.percentile(np_groceries[i][:,0],75)
        #----------------------------------------------------
        features[i,10] = np.mean(np_groceries[i][:,1])
        features[i,11] = np.median(np_groceries[i][:,1])
        features[i,12] = np.std(np_groceries[i][:,1])
        features[i,13] = np.var(np_groceries[i][:,1])
        features[i,14] = np.min(np_groceries[i][:,1])
        features[i,15] = np.max(np_groceries[i][:,1])
        features[i,16] = np.argmin(np_groceries[i][:,1])
        features[i,17] = np.argmax(np_groceries[i][:,1])
        features[i,18] = np.percentile(np_groceries[i][:,1],25)
        features[i,19] = np.percentile(np_groceries[i][:,1],75)
        #----------------------------------------------------
        features[i,20] = np.mean(np_groceries[i][:,2])
        features[i,21] = np.median(np_groceries[i][:,2])
        features[i,22] = np.std(np_groceries[i][:,2])
        features[i,23] = np.var(np_groceries[i][:,2])
        features[i,24] = np.min(np_groceries[i][:,2])
        features[i,25] = np.max(np_groceries[i][:,2])
        features[i,26] = np.argmin(np_groceries[i][:,2])
        features[i,27] = np.argmax(np_groceries[i][:,2])
        features[i,28] = np.percentile(np_groceries[i][:,2],25)
        features[i,29] = np.percentile(np_groceries[i][:,2],75)                
        #----------------------------------------------------
        features[i,30] = np.mean(np_groceries[i][:,3])
        features[i,31] = np.median(np_groceries[i][:,3])
        features[i,32] = np.std(np_groceries[i][:,3])
        features[i,33] = np.var(np_groceries[i][:,3])
        features[i,34] = np.min(np_groceries[i][:,3])
        features[i,35] = np.max(np_groceries[i][:,3])
        features[i,36] = np.argmin(np_groceries[i][:,3])
        features[i,37] = np.argmax(np_groceries[i][:,3])
        features[i,38] = np.percentile(np_groceries[i][:,3],25)
        features[i,39] = np.percentile(np_groceries[i][:,3],75) 

create_features(np_groceries)

[[2947, 84, 196, 203, 186, 1900], [1413, 252, 81, 9, 196, 1752], [2728, 40, 120, 232, 122, 3507], [2934, 126, 324, 126, 114, 800], [1393, 7, 231, 49, 60, 2762], [1710, 75, 60, 130, 89, 3185], [3008, 120, 200, 192, 184, 3337], [4293, 81, 324, 297, 87, 3213], [2510, 50, 15, 250, 160, 3172], [2328, 48, 156, 168, 53, 1036]]
[[3015, 153, 135, 207, 52, 2976], [3030, 100, 230, 190, 87, 3302], [1470, 130, 35, 90, 153, 3142], [2445, 150, 135, 145, 154, 2829], [4923, 45, 639, 243, 186, 3973], [2408, 70, 91, 196, 87, 1865], [2912, 14, 336, 168, 75, 2880], [1820, 35, 357, 28, 89, 1583], [1165, 75, 25, 85, 99, 1642], [2034, 24, 120, 162, 93, 1449], [6390, 80, 190, 590, 132, 2556], [4842, 66, 186, 426, 199, 2036], [1370, 75, 20, 110, 156, 1799]]
[[2828, 168, 224, 140, 116, 1423], [2604, 14, 196, 196, 179, 2398], [384, 18, 24, 24, 167, 3347], [3380, 90, 260, 220, 53, 2242], [4360, 30, 610, 200, 132, 3126], [1855, 35, 260, 75, 153, 3665], [3440, 65, 120, 300, 58, 1868], [1744, 80, 248, 48, 197, 3670],

In [55]:
import random

def create_labels(n,m):
    for i in range(0,n):
        min_indices = np.where(m[i] == np.min(m[i]))[0]
        chosen_index = random.choice(min_indices)
        labels[i] = chosen_index + 1

    
create_labels(n_inst, tem)

In [56]:
X = features # Features
y = labels # Labels

# Normalize the data to have zero mean and unit variance
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)
X = np.where(std == 0, 0, (X - mean) / std)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a machine learning model on the training set
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model on the testing set
accuracy = model.score(X_test, y_test)
print('Accuracy:', accuracy)

Accuracy: 0.0


In [57]:
# Train a machine learning model on the training set
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(max_depth=10, random_state=0)
model.fit(X_train, y_train)

# Evaluate the model on the testing set
accuracy = model.score(X_test, y_test)
print('Accuracy:', accuracy)

Accuracy: 0.0


In [58]:
unique, counts = np.unique(y_train, return_counts=True)
print(unique)
print(counts)


[1 2 3]
[4 2 2]
