# MENuS
## Using Machine Learning to select the best algorithm
* Define the features.
* Include the ingredients generator.
    * Generate an Instance of Ingredients list.
* Finish the model.
* Use minizinc from python.
    * Run it with 3 Algorithms.
    * Add the Instance to the Instances List.
    * Add the time it takes for each Algorithm to the Time Execution Matrix.
    * Transform the Instances List into the Features Array.
    * Travel through the Time Execution Matrix and pick the Algorithms with faster results for each Instance.
    * The Algorithm with the fastest result will be added to the Labels list.

|  | Alg1 | Alg2 | Alg3 |
| --- | --- | --- | --- |
| Ins1 | X | Y | Z |
| Ins2 | X | Y | Z |
| Ins3 | X | Y | Z |
   
* Transform the ingredients list into the dataset for machine learning.
    * Create the Ingredients list.
    * Test with the minizinc model, all the Alg = 3 -> minizinc(300).
    * Take and transform the one that takes less time.
* Training the AI. Split dataset, 70% for training and 30% for prediction, at least 80% accuracy expected. 
----------------------------------------------------------------------------------------------------------

### Auxiliar Functions

In [152]:
def print_red(text):
    print('\x1b[31m' + text + '\x1b[0m')
    
def print_yellow(text):
    print('\x1b[33m' + text + '\x1b[0m')

def print_green(text):
    print('\x1b[32m' + text + '\x1b[0m')

def print_pink(text):
    print('\x1b[35m' + text + '\x1b[0m')

def print_cyan(text):
    print('\x1b[36m' + text + '\x1b[0m')

## Get the Instances / Ingredient list
The instances have been generated using the notebook: Generate Instance.ipynb

In [153]:
import os

instances_folder = '..\instances'
instances_path = [os.path.join(instances_folder, f) for f in os.listdir(instances_folder) if f.endswith('.dzn')]
instances = []
for path in instances_path:
    with open(path, encoding="utf8") as file_object:
        instances.append(file_object.read())
#print(instances_path[1])
#print(instances[1])

# Minizinc

### Chosen model with annotations
The models have been generated using the notebook: GenerateAnnotations.ipynb

In [154]:
model = '../models/winners/smallest-indomain.mzn'

## Solvers

In [155]:
solvers = ["HiGHS", "COIN-BC"]

In [156]:
import random
import subprocess
# Returns the solver who solved the model in the best way possible
def solve_with_minizinc(instances, timeout_mzn, timeout):    
    winner = [0] * len(instances) # 0 for HiGHS, 1 for COIN-BC
    timeout_mzn = timeout_mzn*1000
    
    for i, instance in enumerate(instances):
        mnt = float('inf')
        time = float('inf')
        for j, solver in enumerate(solvers):
            # Run the minizinc command for the current model and data file  
            cmd = f"minizinc --solver {solver} --output-time {model} {instance} --output-time --solver-time-limit {timeout_mzn}" 
            proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
            try: 
                # Get output from fzn
                stdout, stderr = proc.communicate(timeout=timeout) 
                stdout = stdout.decode()
                try:
                    # Store value mnt (temp).
                    start = stdout.find('mnt = ')+6
                    end = stdout.find(';',start)
                    mnt_temp = int(stdout[start:end])
                    # Store value time (temp).
                    time_temp = float(stdout.split(' ')[-2])
                except:
                    mnt_temp = float('inf')
                    time_temp = float('inf')
                print_green(f'{solver} with {instance} SOLVED with a value of {mnt_temp} in {time_temp:.2f} seconds.')

            except subprocess.TimeoutExpired: 
                print_red(f'Error from minizinc: Stopping.')
                mnt_temp = float('inf')
                time_temp = float('inf')
                if os.name == 'nt': # If the os is Windows
                    subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)]) # Force kill the process
                else:
                    os.killpg(os.getpgid(proc.pid), signal.SIGTERM) # Kill the process group

            if mnt >= mnt_temp: # If the value is minor from the stored
                if mnt == mnt_temp: # If the value is equal from the stored compare time
                    if time >= time_temp: # If the value of time from minizinc is minor from the stored
                        if time == time_temp: # If the value of time from minizinc is equal from the stored choose random
                            winner[i] = random.randint(j-1,j)    
                        else:
                            mnt = mnt_temp
                            time = time_temp
                            winner[i] = j
                else:
                    mnt = mnt_temp
                    time = time_temp
                    winner[i] = j
        print_yellow(str(winner[i])+" WON!")
    return winner

# Machine Learning
## Get data: Parse ingredients list

In [157]:
import numpy as np

# Ingredients list
def get_groceries(g_array):
    data_groceries = []
    for i in range(0, len(g_array)):
        loop_groceries = g_array[i].split("groceries = [|")
        loop_groceries = loop_groceries[1].split("|];\nrequirements")
        loop_groceries = loop_groceries[0].split("|")
        
        for j in range(0, len(loop_groceries)):
            # Conversion from String to Int Array.
            loop_groceries[j] = [int(x) for x in loop_groceries[j].split(",")]
            
        data_groceries = data_groceries + [np.array(loop_groceries)]
    return data_groceries

def create_features(instances):
    q_features = 40
    res = np.zeros((len(instances), q_features))    
    np_groceries = get_groceries(instances)
    for i in range(0,len(instances)):
        # features: i in the left represents an Instance; 0,1... in the right represents a Feature.
        # np_groceries: i in the left represents an Instance; ":" represents all rows; 0,1... in the right represents a Macronutrient Column.
        # 0: Calories. 1: Protein. 2: Carbo. 3: Fat. 4: Quantity. 5: Price.
        res[i,0] = np.mean(np_groceries[i][:,0])
        res[i,1] = np.median(np_groceries[i][:,0])
        res[i,2] = np.std(np_groceries[i][:,0])
        res[i,3] = np.var(np_groceries[i][:,0])
        res[i,4] = np.min(np_groceries[i][:,0])
        res[i,5] = np.max(np_groceries[i][:,0])
        res[i,6] = np.argmin(np_groceries[i][:,0])
        res[i,7] = np.argmax(np_groceries[i][:,0])
        res[i,8] = np.percentile(np_groceries[i][:,0],25)
        res[i,9] = np.percentile(np_groceries[i][:,0],75)
        #----------------------------------------------------
        res[i,10] = np.mean(np_groceries[i][:,1])
        res[i,11] = np.median(np_groceries[i][:,1])
        res[i,12] = np.std(np_groceries[i][:,1])
        res[i,13] = np.var(np_groceries[i][:,1])
        res[i,14] = np.min(np_groceries[i][:,1])
        res[i,15] = np.max(np_groceries[i][:,1])
        res[i,16] = np.argmin(np_groceries[i][:,1])
        res[i,17] = np.argmax(np_groceries[i][:,1])
        res[i,18] = np.percentile(np_groceries[i][:,1],25)
        res[i,19] = np.percentile(np_groceries[i][:,1],75)
        #----------------------------------------------------
        res[i,20] = np.mean(np_groceries[i][:,2])
        res[i,21] = np.median(np_groceries[i][:,2])
        res[i,22] = np.std(np_groceries[i][:,2])
        res[i,23] = np.var(np_groceries[i][:,2])
        res[i,24] = np.min(np_groceries[i][:,2])
        res[i,25] = np.max(np_groceries[i][:,2])
        res[i,26] = np.argmin(np_groceries[i][:,2])
        res[i,27] = np.argmax(np_groceries[i][:,2])
        res[i,28] = np.percentile(np_groceries[i][:,2],25)
        res[i,29] = np.percentile(np_groceries[i][:,2],75)                
        #----------------------------------------------------
        res[i,30] = np.mean(np_groceries[i][:,3])
        res[i,31] = np.median(np_groceries[i][:,3])
        res[i,32] = np.std(np_groceries[i][:,3])
        res[i,33] = np.var(np_groceries[i][:,3])
        res[i,34] = np.min(np_groceries[i][:,3])
        res[i,35] = np.max(np_groceries[i][:,3])
        res[i,36] = np.argmin(np_groceries[i][:,3])
        res[i,37] = np.argmax(np_groceries[i][:,3])
        res[i,38] = np.percentile(np_groceries[i][:,3],25)
        res[i,39] = np.percentile(np_groceries[i][:,3],75) 
        
    return res

## Create Features
## Create Labels
Represents the solver that solved the model in the best way, comparing first the result and then the time. The best solver will be the one that solves the model with a minor value in less time.

In [158]:
#q_features = 40
#features = np.zeros((len(instances), q_features))
features = create_features(instances)
labels = solve_with_minizinc(instances_path, 10, 15)

[32mHiGHS with ..\instances\0.dzn SOLVED with a value of 40 in 5.45 seconds.[0m
[32mCOIN-BC with ..\instances\0.dzn SOLVED with a value of 40 in 4.28 seconds.[0m
[33m1 WON![0m
[32mHiGHS with ..\instances\1.dzn SOLVED with a value of 40 in 3.76 seconds.[0m
[32mCOIN-BC with ..\instances\1.dzn SOLVED with a value of 40 in 2.91 seconds.[0m
[33m1 WON![0m
[32mHiGHS with ..\instances\10.dzn SOLVED with a value of 30 in 3.82 seconds.[0m
[32mCOIN-BC with ..\instances\10.dzn SOLVED with a value of 30 in 2.83 seconds.[0m
[33m1 WON![0m
[32mHiGHS with ..\instances\11.dzn SOLVED with a value of 50 in 0.40 seconds.[0m
[32mCOIN-BC with ..\instances\11.dzn SOLVED with a value of 50 in 1.22 seconds.[0m
[33m0 WON![0m
[32mHiGHS with ..\instances\12.dzn SOLVED with a value of 50 in 2.35 seconds.[0m
[32mCOIN-BC with ..\instances\12.dzn SOLVED with a value of 50 in 2.48 seconds.[0m
[33m0 WON![0m
[32mHiGHS with ..\instances\13.dzn SOLVED with a value of 40 in 4.25 seconds.[0m


IndexError: list index out of range

## Algorithms

In [159]:
X = features # Features
y = labels # Labels

# Normalize the data to have zero mean and unit variance
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)
X = np.where(std == 0, 0, (X - mean) / std)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a machine learning model on the training set
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model on the testing set
accuracy = model.score(X_test, y_test)
print('Accuracy:', accuracy)

ValueError: Found input variables with inconsistent numbers of samples: [100, 11]

In [160]:
# Train a machine learning model on the training set
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(max_depth=10, random_state=0)
model.fit(X_train, y_train)

# Evaluate the model on the testing set
accuracy = model.score(X_test, y_test)
print('Accuracy:', accuracy)

Accuracy: 1.0


In [None]:
unique, counts = np.unique(y_train, return_counts=True)
print(unique)
print(counts)


In [None]:
y_train = [0, 0, 1, 2, 0, 1, 0]
freq = {}

# Count frequency of each element in the array
for num in y_train:
    if num in freq:
        freq[num] += 1
    else:
        freq[num] = 1

# Print frequency of each element
for num, count in freq.items():
    print(f"{num}: {count}")
