### Instructions
A - above
B - below
X - delete 
Ctrl Enter - run
Enter - Enter into cell
M - make cell into markdown cell

In [9]:
import numpy as np
import pandas as pd
import sklearn
from matplotlib as pyplot 
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

# TASK 1 - MACHINE LEARNING

### Task 1.1 - Data Preparation

In [10]:
#Read data from file into numpy array
fileData = pd.read_excel('ENB2012_data.xlsx')
dataArray = np.array(fileData)

#Create scaler, which normalises the data (MinMaxScaler normalises from 0 to 1)
scaler = pp.MinMaxScaler()
scaler.fit(dataArray)
dataArray = scaler.transform(dataArray)

### Task 1.2 - Regression

In [11]:
#Split data into x and y arrays
xData = dataArray[:,0:8]
yData = dataArray[:,8:10]

#Using train_test_split, we split the data into 80% training data and 20% testing data
xTrainData, xTestData, yTrainData, yTestData = train_test_split(xData, yData, test_size=0.2)

#Creates iteratively trained Multi-layer Perceptron regressor.
#Fit the regressor with training inputs and training outputs.
#Next, obtain actual outputs using test inputs.
mlpRegressor = MLPRegressor()
mlpFit = mlpRegressor.fit(xTrainData, yTrainData)
mlpOutputs = mlpFit.predict(xTestData)

#Creates Random Forest regressor.
#Fit the regressor with training inputs and training outputs.
#Next, obtain actual outputs using test inputs as previously.
rfRegressor = RandomForestRegressor()
rfFit = rfRegressor.fit(xTrainData, yTrainData)
rfOutputs = rfFit.predict(xTestData)

#Creates 1st Support Vector Regressor.
#Split the y-Training data by half into a 1-D array.
#Fit the regressor with training inputs and training outputs.
#Next, obtain actual outputs using test inputs as previously.
yTrainData1 = yTrainData[:,0:1].ravel()
svrRegressor1 = SVR()
svrFit1 = svrRegressor1.fit(xTrainData, yTrainData1)
svrOutputs1 = svrFit1.predict(xTestData)

#Creates 2nd Support Vector Regressor.
#Split the y-Training data by the other half into a 1-D array.
#Fit the regressor with training inputs and training outputs.
#Next, obtain actual outputs using test inputs as previously.
#.ravel() flattens column vector to 1-D array.
yTrainData2 = yTrainData[:,1:2].ravel()
svrRegressor2 = SVR()
svrFit2 = svrRegressor2.fit(xTrainData, yTrainData2)
svrOutputs2 = svrFit2.predict(xTestData)

### Task 1.3 - Assessment of regression

In [12]:
#Calculate mean squared error of predictions against test (real) data for each regressor.
mlpMse = mean_squared_error(yTestData, mlpOutputs)
rfMse = mean_squared_error(yTestData, rfOutputs)
svr1Mse = mean_squared_error(yTestData[:,0:1].ravel(), svrOutputs1)
svr2Mse = mean_squared_error(yTestData[:,1:2].ravel(), svrOutputs2)

#Calculate cross validation for each regressor for training and testing data
mlpCrossValTrain = cross_val_score(mlpFit, xTrainData, yTrainData, cv=10, scoring='neg_mean_squared_error')*(-1)
mlpCrossValTest = cross_val_score(mlpFit, xTestData, yTestData, cv=10, scoring='neg_mean_squared_error')*(-1)

rfCrossValTrain = cross_val_score(rfFit, xTrainData, yTrainData, cv=10, scoring='neg_mean_squared_error')*(-1)
rfCrossValTest = cross_val_score(rfFit, xTestData, yTestData, cv=10, scoring='neg_mean_squared_error')*(-1)

svr1CrossValTrain = cross_val_score(svrFit1, xTrainData, yTrainData1, cv=10, scoring='neg_mean_squared_error')*(-1)
svr1CrossValTest = cross_val_score(svrFit1, xTestData, yTestData[:,0:1].ravel(), cv=10, scoring='neg_mean_squared_error')*(-1)

svr2CrossValTrain = cross_val_score(svrFit2, xTrainData, yTrainData2, cv=10, scoring='neg_mean_squared_error')*(-1)
svr2CrossValTest = cross_val_score(svrFit2, xTestData, yTestData[:,1:2].ravel(), cv=10, scoring='neg_mean_squared_error')*(-1)

#Plotting time!
pyplot.boxplot([mlpCrossValTrain, rfCrossValTrain, svr1CrossValTrain, svr2CrossValTrain])
pyplot.xticks([1,2,3,4],["MLP", "RF", "SVR1", "SVR2"])
pyplot.title("Graph of Training CV Scores of MSE against Regressors ")
pyplot.xlabel("Type of Regressor")
pyplot.ylabel("Mean Squared Error")
pyplot.show()
pyplot.boxplot([mlpCrossValTest, rfCrossValTest, svr1CrossValTest, svr2CrossValTest])
pyplot.xticks([1,2,3,4],["MLP", "RF", "SVR1", "SVR2"])
pyplot.title("Graph of Testing CV Scores of MSE against Regressors ")
pyplot.xlabel("Type of Regressor")
pyplot.ylabel("Mean Squared Error")
pyplot.show()

AttributeError: module 'matplotlib' has no attribute 'boxplot'

# TASK 2 - OPTIMISATION

### Task 2.1 - Setup & Fitness Function

In [None]:
def generate_timetable():
    #Generates a timetable.
    #Read the file to see how many classes + labs there are.
    #Store them in a 20x3 list alongside empty sessions to fill the list

    #Create "empty" timetable
    timetable = [["empty" for x in range(3)] for y in range(20)] 
    
    #Read file into allSplits list
    allSplits = []
    f = open("modules.txt", 'r')
    lineCount = 0
    for line in f:
        allSplits.append(line.split("|"))
        lineCount+=1
    f.close()
    
    #Add module into timetable
    for i in range(0,lineCount):
        timetable[i][0] = allSplits[i][0]
    
    
    #Add seminars into timetable.
    #Iterate through each module, for each number of seminars use
    #counters to iterate to next empty position and place seminar.
    #Should work on different set of modules if format is kept identical.
    emptyChecker = 0
    emptyChecker1 = 0
    for i in range(0, lineCount):
        for j in range(0, int(allSplits[i][2])):
            toggle = False
            while toggle == False:
                if timetable[emptyChecker][emptyChecker1] == "empty":
                    timetable[emptyChecker][emptyChecker1] = allSplits[i][0]
                    toggle = True
                emptyChecker1+=1
                if emptyChecker1 == 3:
                    emptyChecker+=1
                    emptyChecker1=0
    
    #randomises through randomise function below
    #randomSwitch = False
    #while randomSwitch == False:
    randomise_timetable(timetable)
        #randomSwitch = room_validity(timetable)
    return timetable

In [None]:
def randomise_timetable(timetable):
    #Create randomised timetable
    #Add lectures to lec. list and labs to lab list
    #Shuffle lists
    #Put back into timetable
    
    lecturesJoined = []
    labsJoined = []
    for i in range(len(timetable)):
        for j in range(0,len(timetable[i]), len(timetable[i])):
            lecturesJoined.append(timetable[i][j])
            labsJoined.append(timetable[i][j+1])
            labsJoined.append(timetable[i][j+2])
    np.random.shuffle(lecturesJoined)
    np.random.shuffle(labsJoined)
    counterLecs = 0
    counterLabs = 0
    for i in range(0,len(timetable)):
        for j in range(0,len(timetable[i]), len(timetable[i])):
            timetable[i][j] = lecturesJoined[counterLecs]
            timetable[i][j+1] = labsJoined[counterLabs]
            timetable[i][j+2] = labsJoined[counterLabs+1]
            counterLecs+=1
            counterLabs+=2

    return timetable

In [None]:
#This is the fitness function.
def fitness_function(timetable):
    #Calculates fitness of given timetable
    
    allSplits = []
    f = open("modules.txt", 'r')
    lineCount = 0
    for line in f:
        allSplits.append(line.split("|"))
        lineCount+=1
    f.close()
    
    modSplits = []
    modConstraintSplits = []
    for line in allSplits:
        modSplits.append(line[0])
        modConstraintSplits.append(line[3])
    """
    CONSTRAINTS:
    1) ONLY 1 LECTURE AND TWO LABS MAY RUN CONCURRENTLY
    2) LABS FOR A MODULE MAY ONLY OCCUR AFTER ITS LECTURE! PRECEDENCE!
    3) SESSIONS CAN'T INTERFERE! CONCURRENCE!
    """
    #Precedence constraints
    precConstraints = 0
    #module loop
    for i in range(0, len(timetable)):
        module = timetable[i][0]
        if module == "empty":
            continue
        #all of the timetable lab loop
        for j in range(0, len(timetable)):
            if j > i:
                break
            for l in range(1, len(timetable[j])):
                if module == timetable[j][l]:
                    precConstraints+=1
            
    
    #Concurrency constraints
    #Functional and clean, aside from try catch section which only caters to list length 3.
    concConstraints = 0
    #Cycle through each session
    for i in range(0,len(timetable)):
        #Cycle through each room
        for j in range(0,len(timetable[i])):
            if timetable[i][j] == "empty":
                continue
            constraintModules = []
            pos1 = modSplits.index(timetable[i][j])
            clashes1 = modConstraintSplits[pos1]
            #2nd cycle through each room, linear comparison of modules and clashes, starting from one after the one just checked.
            #Such that in a list [a, b, c, d] it compares as follows: ab, ac, ad, bc, bd, cd - 3 of each, all are compared
            #once to get most accurate concurrency violation value.
            for k in range(j,len(timetable[i])):
                if k == j or timetable[i][k] == "empty":
                    continue
                else:
                    pos2 = modSplits.index(timetable[i][k])
                    clashes2 = modConstraintSplits[pos2]
                    if timetable[i][j] == timetable[i][k] or timetable[i][j] in clashes2 or timetable[i][k] in clashes1:
                        concConstraints+=1

    if concConstraints == 0 and precConstraints == 0:
        return 0
    if concConstraints == 0:
        concConstraints = 1
    if precConstraints == 0:
        precConstraints = 1
    return precConstraints * concConstraints

In [None]:
def mutation_swap(timetable):
    #This function swaps two modules at random.
    #Must be modified for another timetable as it's specific to
    #this problem and its room (1 theatre 2 lab) constraints. 
    moduleIndex = []
    tim = len(timetable)-1
    tim2 = len(timetable[0])-1
    moduleIndex.append(np.random.randint(0,tim))
    moduleIndex.append(np.random.randint(0,tim2)) 
    whileToggle = False
    x = np.random.randint(0,tim)
    if moduleIndex[1] == 0:
        y = 0
    else:
        y = np.random.randint(1, tim2) 
    #While loop ensures randomly selected modules aren't the same module (x and y indexes arent both equal)
    #Furthermore, it ensure you may only swap a lecture with a lecture and a lab with a lab.
    while whileToggle == False:
        if x == moduleIndex[0] and y == moduleIndex[1]:
            x = np.random.randint(0,tim)
            if moduleIndex[1] == 0:
                y = 0
            else:
                y = np.random.randint(1, tim2) 
        else:
            whileToggle = True
    moduleIndex.append(x)
    moduleIndex.append(y)
    mod1 = timetable[moduleIndex[0]][moduleIndex[1]]
    mod2 = timetable[moduleIndex[2]][moduleIndex[3]]
    timetable[moduleIndex[0]][moduleIndex[1]] = mod2
    timetable[moduleIndex[2]][moduleIndex[3]] = mod1
    return timetable 

In [None]:
def mutation_ruin_and_run(timetable):
    #randomises given timetable
    return randomise_timetable(timetable)

In [None]:
#Objective Function for current coursework problem.
def comparison_lower(a, b):
    return b <= a

### Task 2.2 - Objective Algorithm Implementation

In [None]:
def hillclimber(itInt, timetable, problem, mutation, comparison):
    #itInt = no. of iterations
    #problem = fitness calculator
    #mutation = function by which to change decision variable
    #decision variable is the timetable, generated within the function!
    #comparison = objective function for current optimisation goal
    
    parent = timetable
    #for i in range(0,len(parent)):
        #print(parent[i])
    parentFitness = problem(parent)
    
    #Loop for itint no. of times
    fitnessList = []
    for iter in range(itInt):
        #if iter % 100 == 0:
            #print(f"Progress: {iter} / {itInt}")
        #Mutate
        child = mutation(parent)
        
        #Evaluate
        childFitness = problem(child)
        
        #Print parent soln
        if comparison(parentFitness, childFitness):
            parent = child
            parentFitness = childFitness
        fitnessList.append(parentFitness)
    
    return parent, fitnessList

In [None]:
#Optimisation Algorithm Implementation

def optimisation():
    t = generate_timetable()

    swapFits = []
    rnrFits = []
    
    for i in range(30):
        if i%5 == 0:
            print(f"Progress: {i}/30")
        #Swap Mutation 
        x, y = hillclimber(500, t, fitness_function, mutation_swap, comparison_lower)
        swapFits.append(y)
        
        #Ruin-and-Run Mutation
        a, b = hillclimber(500, t, fitness_function, mutation_ruin_and_run, comparison_lower)
        rnrFits.append(b)
        
    print("Done.\n")
    
    return swapFits, rnrFits

### Task 2.3 - Visualisation of results

In [None]:
swapFits, rnrFits = optimisation()

#Creates max, mean and min lists.
maxListSwap = []
meanListSwap = []
minListSwap = []
maxListRnr = []
meanListRnr = []
minListRnr = []
#Assigns values to lists.
for i in range(0, len(swapFits[0])):
    valuesSwap = []
    valuesRnr = []
    for j in range(0, len(swapFits)):
            valuesSwap.append(swapFits[j][i])
            valuesRnr.append(rnrFits[j][i])
    #Processes values from values lists to respective lists for plotting.
    maxListSwap.append(max(valuesSwap))
    meanListSwap.append(sum(valuesSwap)/len(valuesSwap))
    minListSwap.append(min(valuesSwap))
    maxListRnr.append(max(valuesRnr))
    meanListRnr.append(sum(valuesRnr)/len(valuesRnr))
    minListRnr.append(min(valuesRnr))   
    
#Plots data to graph.
pyplot.plot(maxListSwap, label="Swap Maximum")
pyplot.plot(meanListSwap, label="Swap Average")
pyplot.plot(minListSwap, label="Swap Minimum")

pyplot.plot(maxListRnr, label="RnR Maximum")
pyplot.plot(meanListRnr, label="RnR Average")
pyplot.plot(minListRnr, label="RnR Minimum")

pyplot.legend()
pyplot.title("Graph of Hill-Climber Fitness against Iterations ")
pyplot.xlabel("Number of Iterations")
pyplot.ylabel("Fitness Score")
pyplot.show()