# Machine Learning Max Runtime flow
## Import relevant libraries and dataset
 - Dataset is created by PNRdatabase jupyter notebook

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
data = pd.read_csv('PNRdatabase3.csv') # load dataset (csv file) with pandas

## Define features (X) and targets (y)
 - Define X as feature columns
 - Define y as target column RunTimeRoute
 - Define z as target column RunTimePlace

In [2]:
# Drop all columns which will not be used in machine learning model, leaving feature and target columns
train = data.drop(['Type40LP','MetalStack','Date','RowDirection','DoubleBack','FlipFirstRow','StartfromFirstRow'],axis=1)

# Drop target columns, leaving feature columns
X = train.drop(['Violations','AntennaViolations','RunTimeRoute','RunTimePlace'],axis=1)

# Define y and z to be target columns
y = train['RunTimeRoute']
z = train['RunTimePlace']

## Split dataset into train and test sets

In [3]:
# remove outliers from dataset (as known from PNRdatabase Machine Learning (Run Time) jupyter notebook)
y_outliers = [83,67,87]
z_outliers = [21,27,76,85,91]
X_remove_outliers_Place = X.drop(z_outliers)
X_remove_outliers_Route = X.drop(y_outliers)
y_remove_outliers = y.drop(y_outliers)
z_remove_outliers = z.drop(z_outliers)

# Split dataset into train (80%) and test (20%) sets
from sklearn.model_selection import train_test_split
X_train_Route, X_test_Route, y_train, y_test = train_test_split(X_remove_outliers_Route, y_remove_outliers, test_size=0.2, random_state=41)
X_train_Place, X_test_Place, z_train, z_test = train_test_split(X_remove_outliers_Place, z_remove_outliers, test_size=0.2, random_state=41)

## Check each layer count for maximum run time

In [4]:
# define an empty list to append values of maximum run time for each value of layer count
listMaxRunTimeRoute = []

# loop through each value of layer count from 6 to 9
for i in range(6,10):
    listRunTimeRoute = [] # define an empty list to store all values of run time for a specific layer count
    layercount = np.where(X_train_Route['layercount'] == i)
    for j in range(len(layercount[0])):
        listRunTimeRoute.append(y_train.iloc[layercount[0][j]])
    listMaxRunTimeRoute.append((max(listRunTimeRoute))) # append maximum value of run time from list of all values of run time
listMaxRunTimeRoute

[166, 171, 170, 173]

In [5]:
# define an empty list to append values of maximum run time for each value of layer count
listMaxRunTimePlace = []

# loop through each value of layer count from 6 to 9
for i in range(6,10):
    listRunTimePlace = [] # define an empty list to store all values of run time for a specific layer count
    layercount = np.where(X_train_Place['layercount'] == i)
    for j in range(len(layercount[0])):
        listRunTimePlace.append(z_train.iloc[layercount[0][j]])
    listMaxRunTimePlace.append((max(listRunTimePlace))) # append maximum value of run time from list of all values of run time
listMaxRunTimePlace

[195, 224, 217, 230]

## Check test set and mark failures
 - check actual run time values of test set and compare with benchmarked maximum values of run times

In [6]:
# define a list to store all actual values of outcomes (failure (0) or success (1))
listsuccessRoute = []

k = 0
for i in range(6,10):
    listRunTimeRoute = []
    layercount = np.where(X_test_Route['layercount'] == i)
    for j in range(len(layercount[0])):
        listRunTimeRoute.append(y_test.iloc[layercount[0][j]])
        
        # if run time is larger than maximum run time of same layer count, append 0 (failure) to list
        if y_test.iloc[layercount[0][j]] > listMaxRunTimeRoute[k]:
            listsuccessRoute.append(0)
            
        # if run time is smaller than maximum run time of same layer count, append 1 (success) to list
        else:
            listsuccessRoute.append(1)
    print(listRunTimeRoute)
    k = k+1
listsuccessRoute

[70, 91, 85]
[99, 66, 155, 69, 68, 84]
[76, 68, 92, 81, 60, 68, 59]
[67, 160, 74, 60, 61]


[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [7]:
# define a list to store all actual values of outcomes (failure (0) or success (1))
listsuccessPlace = []
k = 0
for i in range(6,10):
    listRunTimePlace = []
    layercount = np.where(X_test_Place['layercount'] == i)
    for j in range(len(layercount[0])):
        listRunTimePlace.append(z_test.iloc[layercount[0][j]])
        
        # if run time is larger than maximum run time of same layer count, append 0 (failure) to list
        if z_test.iloc[layercount[0][j]] > listMaxRunTimePlace[k]:
            listsuccessPlace.append(0)
            
        # if run time is smaller than maximum run time of same layer count, append 1 (success) to list
        else:
            listsuccessPlace.append(1)
    print(listRunTimePlace)
    k = k+1
listsuccessPlace

[168, 198, 157, 181]
[186, 170, 207, 156, 201]
[213, 179, 167, 168, 205, 156]
[198, 172, 209, 159, 174, 161]


[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

## Use best model with tuned hyperparameters to predict runtimes
 - As known from PNRdatabase Machine Learning (Run Time) jupyter notebook

In [8]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import metrics

# fit best model with tuned hyperparameters using train set and test using test set
XGB_test = GradientBoostingRegressor(max_features=1,max_depth=3,min_samples_leaf=7,random_state=7)
XGB_test.fit(X_train_Route,y_train)
y_pred = XGB_test.predict(X_test_Route)
print(y_pred)

[ 70.57045018  75.79877703 119.383977    78.69720103 100.82259223
  84.37454368  75.13986682  82.75121557  75.519549    71.59728372
  82.75121557 134.46021541  67.67263428  73.80487486  60.50840592
  76.79970885  81.42853207  87.82309196  76.79970885  74.05359907
  87.82309196]


In [9]:
from sklearn.svm import SVR

# fit best model with tuned hyperparameters using train set and test using test set
svr_test = SVR(C=1,max_iter=10000)
svr_test.fit(X_train_Place,z_train)
z_pred = svr_test.predict(X_test_Place)
print(z_pred)

[181.16823032 181.10339426 180.8151207  181.11708823 180.77546289
 180.74188969 180.75154309 181.09838766 181.02892396 181.11516596
 181.10203349 180.77842215 180.78756218 180.79604259 180.77884552
 180.72982106 180.77273866 181.08995836 180.80602596 180.74577163
 181.10937313]


## Check predicted successes and failures
 - Note: this is different from above (Check test set and mark failures)
 - The marking of failure and success is dependent on predict values of run times of test set instead of actual values

In [10]:
# define a list to store all PREDICTED values of outcomes (failure (0) or success (1))
listPredictedSuccessRoute = []
k = 0
for i in range(6,10):
    listRunTimeRoute = []
    layercount = np.where(X_test_Route['layercount'] == i)
    for j in range(len(layercount[0])):
        listRunTimeRoute.append(y_pred[layercount[0][j]])
        
        # if run time is larger than maximum run time of same layer count, append 0 (failure) to list
        if y_pred[layercount[0][j]] > listMaxRunTimeRoute[k]:
            listPredictedSuccessRoute.append(0)
            
        # if run time is larger than maximum run time of same layer count, append 1 (success) to list
        else:
            listPredictedSuccessRoute.append(1)
    print(listRunTimeRoute)
    k = k+1
listPredictedSuccessRoute

[78.69720103246648, 100.8225922340301, 71.59728372246197]
[70.57045018476822, 75.79877702656844, 119.38397700098069, 84.37454367673024, 76.79970885207403, 76.79970885207403]
[75.13986681932349, 82.75121556708143, 82.75121556708143, 67.6726342799172, 87.82309196185261, 74.05359907280248, 87.82309196185261]
[75.5195489985331, 134.46021540633927, 73.80487485968364, 60.508405916577665, 81.42853207310013]


[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [11]:
# define a list to store all PREDICTED values of outcomes (failure (0) or success (1))
listPredictedSuccessPlace = []
k = 0
for i in range(6,10):
    listRunTimePlace = []
    layercount = np.where(X_test_Place['layercount'] == i)
    for j in range(len(layercount[0])):
        listRunTimePlace.append(z_pred[layercount[0][j]])
        
        # if run time is larger than maximum run time of same layer count, append 0 (failure) to list
        if y_pred[layercount[0][j]] > listMaxRunTimeRoute[k]:
            listPredictedSuccessPlace.append(0)
            
        # if run time is smaller than maximum run time of same layer count, append 1 (success) to list
        else:
            listPredictedSuccessPlace.append(1)
    print(listRunTimePlace)
    k = k+1
listPredictedSuccessPlace

[181.1170882252184, 180.77546289391577, 181.0289239632916, 180.77273866131355]
[181.1682303246939, 180.81512070463347, 181.11516595773946, 180.72982105691963, 180.80602595761562]
[181.10339425915402, 180.74188969302028, 181.10203349083616, 180.77884552494504, 181.089958360625, 180.7457716303632]
[180.751543086358, 181.09838765978063, 180.77842215499797, 180.78756217507708, 180.7960425892777, 181.10937312750812]


[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

## Check accuracy of predicted success/failure
 - Compare predicted list of success and failures with actual list

In [12]:
from sklearn.metrics import accuracy_score,confusion_matrix
accuracy_score_Place = accuracy_score(listsuccessPlace,listPredictedSuccessPlace)
print(listsuccessPlace)
print(listPredictedSuccessPlace)
print(accuracy_score_Place)

[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0.9523809523809523


In [13]:
accuracy_score_Route = accuracy_score(listsuccessRoute,listPredictedSuccessRoute)
print(listsuccessRoute)
print(listPredictedSuccessRoute)
print(accuracy_score_Route)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
1.0


In [14]:
# confusion matrix of RunTimePlace
confusion_matrix(listsuccessPlace,listPredictedSuccessPlace)

array([[ 0,  1],
       [ 0, 20]], dtype=int64)

In [16]:
# confusion matrix of RunTimeRoute
confusion_matrix(listsuccessRoute,listPredictedSuccessRoute)

array([[21]], dtype=int64)