# Genetic Algorithm for the Qudratci Assignment Problem

In [1]:
##This is an initialization cell. Run this first
import pandas as pd
import numpy as np
from itertools import product
import time
import math
import matplotlib
import matplotlib.pyplot as plt

## Repository

In [2]:
def CSVtoNumpyArray(rawdata):
    """
    Input: 
    rawdata = a csv file (insert name as a string)

    Output:
    two numpy matrices in a tuple
    
    Optimised 04/06/2020
    """
    data = pd.read_csv(rawdata)  #Reads the data in as a pandas object
    c = data.columns
    column = int(c[0])
    final_data1 = data.iloc[:column,:].values  #Sets data into a series of numpy arrays of strings
    final_data2 = data.iloc[column:,:].values  #1 is for the first matrix(loc) and 2 is for the second(flow)
    

    #Forms the matrix as a numpy array (easier to work with) instead of an list of lists of strings
    def string_to_integers(final_data):
        matrix = np.zeros(column)
        for j in range(column):
            string = final_data[j][0]
            string2 = string.split(" ")
            emptyarray = np.array([])
            for i in string2:
                if i != '':
                    emptyarray = np.append(emptyarray,i).astype(int)
            matrix = np.vstack((matrix,emptyarray))
        return matrix[1:]
    return string_to_integers(final_data1),string_to_integers(final_data2)

In [None]:
#small sized matrices(under 10x10) (quick on all methods)
matrix_size_4 = './data/made4.csv'
matrix_size_5 = './data/made5.csv'
matrix_size_6 = './data/made6.csv'
matrix_size_7 = './data/made7.csv'
matrix_size_8 = './data/made8.csv'
matrix_size_9 = './data/made9.csv'

matrixMade = ['./data/made4.csv', 
              './data/made5.csv', 
              './data/made6.csv', 
              './data/made7.csv', 
              './data/made8.csv', 
              './data/made9.csv']


#medium sized matrices(ranging from 10x10 to 30x30) (slow on deterministic methods, fast on heuristics)
matrix_size_10 = './data/tai10a.csv'
matrix_size_11 = './data/made11.csv'
matrix_size_12 = './data/tai12a.csv'
matrix_size_15 = './data/chr15a.csv' 
matrix_size_20 = './data/chr20a.csv'
matrix_size_26 = './data/bur26a.csv'

#large sized matrices(30x30 and bigger)(reasonably slow on the heuristics to a certain degree of accuracy)
matrix_size_40 = './data/tai40a.csv'
matrix_size_60 = './data/tai60.csv'
matrix_size_80 = './data/tai80.csv'
matrix_size_256 = './data/tai256c.csv'

datamatrix = CSVtoNumpyArray(matrix_size_4) # Decide the size of problem to run in the code (clue: 
                                                #the number in the original name is the size)
MatrixLoc = datamatrix[0]
MatrixFlow = datamatrix[1]

## Preliminary functions

In [16]:
def make_babies(length,populationsize):
    """
    Input:
    length is the size of the matrix
    populationsize is the number of permutations you need
    
    Output:
    listofpermutations: list of lists
    """
    listofpermutations= np.zeros(length)
    triallist = np.array(list(range(length)))
    i = 0
    for i in range(populationsize):
        np.random.shuffle(triallist)
        dummy = triallist[:]
        listofpermutations = np.vstack((listofpermutations,dummy))
    return listofpermutations.astype(int)[1:]

In [20]:
# Find the minimum
def fit_list(listofpermutations,MatrixLoc,MatrixFlow,regenpercent):
    """
    Input:
    list of permutations: a list of lists
    
    Output:
    opt perm: list
    opt perm length: float
    """
    matrix_length = len(listofpermutations[0])
    arraysol = np.array([])
    #generate the multiples (that function we are optimising)
    for j in listofpermutations:
        total = 0
        for i in range(matrix_length):
            for k in range(matrix_length):
                if i!=k:
                    total += MatrixLoc[i][k]*MatrixFlow[j[i]][j[k]]
        arraysol = np.append(arraysol,total)
    regenvalue = len(listofpermutations)//regenpercent
    if regenvalue < 5:
        regenvalue = 5
        
    
    finalcost = np.sort(arraysol)[:regenvalue]
    finalindex = np.argsort(arraysol)[:regenvalue] #finds the optimal set of locations to factories
    
    size = len(listofpermutations)//2
    percentsize = int(0.1*size)
    refillnumber = int(size-percentsize)
    #print(xy)
    return finalcost,finalindex,np.array(listofpermutations)[finalindex],refillnumber

In [21]:
def crossover(topvalues,refillnumber):
    """
    Input:
    topvalues: best parents selected for crossover
    refillnumber: int to refill - i.e. number of children
    
    Output:
    
    
    """
    #helpful intial values
    kids = topvalues[:]
    length=np.size(topvalues[0])  #this is the size of the matrix
    k=True
    z=0
    
    #sorted crossover
    while k == True:
        z+=1
        parent1 = random.choice(topvalues)
        parent2 = random.choice(topvalues)
        split = random.randint(0,length)
        
        child1 = parent1[0:split]
        child1cop = child1[:]
        
        child2 = parent2[0:split]
        child2cop = child2[:]
        if z>(refillnumber//2):
            k=False
        
        for i in range(length):
            if parent2[i] in child1cop:
                break
            else:
                child1 = np.append(child1,parent2[i])
            if parent1[i] in child2cop:
                break
            else:
                child2 = np.append(child2,parent1[i])
            
        if len(child1) == length:
            kids = np.concatenate((kids,[child1]))
        if len(child2) == length:
            kids = np.concatenate((kids,[child2]))
            
    return(kids)

In [22]:
def mutation(pop):
    mutated = [pop[0]]
    for agent in range(len(pop)):
        doi = pop[agent]
        length = np.size(pop[2])
        
        if random.uniform(0.0,1.0)<= 0.1:
            q = int(random.uniform(0,length))
            u = q-1
            
            temp1 = doi[q]
            temp2 = doi[u]

            doi[q] = temp2
            doi[u] = temp1
            
            mutated = np.concatenate((mutated,[doi]))
        else:
            mutated = np.concatenate((mutated,[doi]))
        
    return(mutated[1:])

In [23]:
#THE FINAL CODE
def ga(popsize,generations,name,regenpercent):
    """
    Input:
    popsize: int initial population size (minimum 20)
    generations: int on number in generation
    name: str input file name
    
    Output:
    optimal: list,value
    
    """
    inputdata = CSVtoNumpyArray(name)
    MatrixLoc = np.array(inputdata[0])
    MatrixFlow = np.array(inputdata[1])
    length = len(MatrixFlow[0])
    
    listofpermutations = make_babies(length,popsize)
    
    
    for i in range(generations):
        
        
        topvalues = fit_list(listofpermutations,MatrixLoc,MatrixFlow,regenpercent)[2]
        refillnumber = fit_list(listofpermutations,MatrixLoc,MatrixFlow,regenpercent)[3]
        postcross = crossover(topvalues,refillnumber)
        postmut = mutation(postcross)
        listofpermutations = postmut[:]
        
    return fit_list(listofpermutations,MatrixLoc,MatrixFlow,regenpercent)[0][0], fit_list(listofpermutations,MatrixLoc,MatrixFlow,regenpercent)[2][0]         

In [None]:
(ga(100,10,matrix_,r),r,count)