In [49]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
import torch.optim as optim
import torch
from torchsummary import torchsummary
from torchsummary import summary
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt 
import copy
from sklearn import linear_model
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.optim.lr_scheduler import StepLR

from sklearn import ensemble
import xgboost as xg
from sklearn.metrics import mean_squared_error
import time
import glob
from sklearn.metrics import mean_absolute_error

In [50]:
df = pd.read_pickle('./data_7nm.pkl')
df['total_power']=df.iloc[:,-3:].sum(axis=1)
lower_limit = 0.0000
upper_limit = 0.0005
#filtered_df = df
filtered_df = df[(df['total_power'] >= lower_limit) & (df['total_power'] <= upper_limit)]
filtered_df = filtered_df.drop(columns='total_power')
#print(df)

X_df = filtered_df.drop(columns=['instance_name','internal_power', 'leakage_power', 'switching_power', 'zd_total_power'],axis=1)  # Features
y1 = filtered_df["internal_power"]+filtered_df["leakage_power"]+filtered_df["switching_power"]

In [51]:
#Quick details on the data
df.describe()

Unnamed: 0,tr_0,tr_1,sp_0,sp_1,op_cap0,delay_0,delay_1,delay_2,delay_3,rail_voltage_0,zd_leakage_power,zd_internal_power,zd_switching_power,zd_total_power,internal_power,leakage_power,switching_power,total_power
count,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0,372420.0
mean,0.157468,0.126412,0.4644,0.377284,24.969722,132.323686,112.211749,20.549298,26.260413,0.70125,2.227274e-08,0.000214,0.00115,0.001365,7.024689e-07,2.174846e-08,2.8608e-06,3.585017e-06
std,0.050755,0.077746,0.226033,0.274037,13.117099,91.011605,99.892379,51.243329,67.171274,0.056939,3.53681e-08,0.0004,0.000922,0.000916,2.468401e-06,3.471931e-08,3.870281e-06,5.589085e-06
min,0.0,0.0,0.0,0.0,1.079989,4.951166,0.0,0.0,0.0,0.63,6.687331e-12,-0.001764,0.0,-1e-06,-2.247306e-06,7.025001e-12,0.0,-2.284751e-08
25%,0.123,0.07,0.315,0.109,13.633696,69.03548,44.881839,0.0,0.0,0.63,2.071897e-09,-3e-06,0.0004,0.000659,1.802036e-09,1.759207e-09,6.318618e-07,8.403893e-07
50%,0.161,0.142,0.486,0.418,24.935329,107.494091,92.744057,0.0,0.0,0.7,8.723944e-09,0.000211,0.000934,0.001199,1.620623e-07,8.565367e-09,1.717256e-06,1.919296e-06
75%,0.198,0.192,0.587,0.526,36.304143,169.475941,158.74297,0.0,0.0,0.77,2.285013e-08,0.000437,0.001694,0.001889,8.519605e-07,2.25997e-08,3.56442e-06,4.105732e-06
max,0.333,0.317,1.0,1.0,52.654617,1002.72937,991.32251,638.175171,670.426758,0.77,3.782222e-07,0.010118,0.006278,0.011754,0.0001311936,4.217212e-07,0.0001025423,0.0001702138


In [52]:
print('The datset has',len(X_df.columns), 'columns.')
print('The dataset has',len(X_df), 'rows.')

The datset has 14 columns.
The dataset has 372404 rows.


In [53]:
# Splitting into train and test sets
X_train, X_test, y1_train, y1_test = train_test_split(X_df, y1, test_size=0.25, random_state=42)
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)

print('Size of train set:',len(X_train))
print('Size of test set:',len(X_test))

Size of train set: 279303
Size of test set: 93101


In [54]:
#Encoding the 'functionality' column
columns_to_encode = ["functionality"]
data_to_encode = X_train[columns_to_encode]
encoder = OneHotEncoder(sparse=False)
encoder.fit(data_to_encode)
with open("encoder1.pkl","wb") as pkl:
    pickle.dump(encoder,pkl)



In [55]:
with open("encoder1.pkl","rb") as pkl:
    ohe = pickle.load(pkl)

In [56]:
encoded_column_names = ohe.get_feature_names_out(columns_to_encode)
encoded_data = ohe.transform(X_train[columns_to_encode])
encoded_df = pd.DataFrame(encoded_data, columns=encoded_column_names).reset_index(drop=True)
X_train = pd.concat([X_train.drop(columns=columns_to_encode),encoded_df] ,axis=1)
print('The shape of train data is:', X_train.shape)

The shape of train data is: (279303, 21)


In [57]:
encoded_column_names = ohe.get_feature_names_out(columns_to_encode)
encoded_data = ohe.transform(X_test[columns_to_encode])
encoded_df = pd.DataFrame(encoded_data, columns=encoded_column_names).reset_index(drop=True)
X_test = pd.concat([X_test.drop(columns=columns_to_encode),encoded_df] ,axis=1)
print('The shape of test data is:',X_test.shape)

The shape of test data is: (93101, 21)


In [58]:
scaler = MinMaxScaler()
scaler.fit_transform(X_train)
with open("scaler1.pkl","wb") as pkl:
    pickle.dump(scaler,pkl)


In [59]:

X_test=scaler.transform(X_test)

In [60]:
target_scaler = StandardScaler()
std_y1_train = target_scaler.fit_transform(y1_train.values.reshape(-1, 1))
std_y1_test = target_scaler.transform(y1_test.values.reshape(-1, 1))

In [61]:
with open("scaler1.pkl","rb") as pkl:
    mm_scaler1 = pickle.load(pkl)
scaled_data = mm_scaler1.transform(X_train)
X_train = pd.DataFrame(scaled_data, columns=X_train.columns)
X_train


Unnamed: 0,tr_0,tr_1,sp_0,sp_1,op_cap0,delay_0,delay_1,delay_2,delay_3,rail_voltage_0,...,zd_internal_power,zd_switching_power,functionality_AND,functionality_BUF,functionality_INV,functionality_NAND,functionality_NOR,functionality_OR,functionality_XNOR,functionality_XOR
0,0.606607,0.672026,0.579,0.363,0.773553,0.059620,0.061571,0.000000,0.000000,1.0,...,0.196096,0.359082,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.477477,0.668810,0.744,0.480,0.432610,0.113919,0.136099,0.168111,0.244139,1.0,...,0.188732,0.049005,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.534535,0.292605,0.329,0.122,0.462978,0.233590,0.254515,0.000000,0.000000,0.0,...,0.139639,0.072386,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.597598,0.000000,0.507,0.000,0.481444,0.099028,0.000000,0.000000,0.000000,1.0,...,0.156876,0.281270,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.177177,0.475884,0.057,0.186,0.723563,0.072095,0.121161,0.131700,0.147700,1.0,...,0.207385,0.270616,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279298,0.702703,0.553055,0.537,0.416,0.466883,0.053369,0.103046,0.091430,0.095062,1.0,...,0.223461,0.436043,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
279299,0.702703,0.440514,0.503,0.493,0.865147,0.150246,0.178926,0.261266,0.332637,1.0,...,0.160821,0.859394,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
279300,0.459459,0.472669,0.390,0.180,0.811368,0.099742,0.110608,0.000000,0.000000,0.5,...,0.190027,0.293917,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
279301,0.459459,0.572347,0.765,0.230,0.716896,0.175599,0.202945,0.282633,0.257897,1.0,...,0.168375,0.362347,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [64]:
stats = []
def train_xgb(max_depth, n_estimators, subsample):
    # Instantiation 
    params = {'tree_method': 'hist',
                  'n_jobs': -1,
                  'seed': 123,
                  'device':'cuda',
                  'max_depth':max_depth,
                  'n_estimators':n_estimators,
                  'learning_rate':0.05,
                  'subsample':subsample,
                  'booster': 'gbtree',
                  'enable_categorical': True
                  }
    xgb_r = xg.XGBRegressor(**params) 
  
    # Fitting the model 
    train_start_time = time.time()
    xgb_r.fit(X_train, std_y1_train)
    train_end_time = time.time()
    delta_train_time = train_end_time - train_start_time

    test_start_time = time.time()
    pred = xgb_r.predict(X_test)
    test_end_time = time.time()
    delta_test_time = test_end_time - test_start_time
    y_transformed = target_scaler.inverse_transform(pred.reshape(-1, 1))
    #print(test_outputs)
    cor_coeff = np.corrcoef(y1_test.squeeze(), y_transformed.squeeze())
    #print("Cor Coeff:", cor_coeff)
    r_square = r2_score(y1_test, y_transformed)
    #print("R2:", r_square)
    mae = mean_absolute_error(y1_test, y_transformed)
    #print("MAE:", mae)
    mse = mean_squared_error(y1_test, y_transformed)
    #print("MSE:", mse) 
    # RMSE Computation 
    rmse = np.sqrt(mean_squared_error(y1_test, y_transformed)) 
    #print("RMSE : % f" %(rmse))
    #print(max_depth,",",n_estimators,",",subsample,",",r_square,",",cor_coeff[0][1],",",mae,",",mse)
    stats.append([max_depth, n_estimators, subsample, r_square, cor_coeff[0][1], mae, mse, delta_train_time, delta_test_time])



In [65]:
train_xgb(max_depth=5, n_estimators=500, subsample=0.5)
train_xgb(max_depth=5, n_estimators=750, subsample=0.5)
train_xgb(max_depth=5, n_estimators=1000, subsample=0.5)

train_xgb(max_depth=5, n_estimators=500, subsample=0.65)
train_xgb(max_depth=5, n_estimators=750, subsample=0.65)
train_xgb(max_depth=5, n_estimators=1000, subsample=0.65)

train_xgb(max_depth=5, n_estimators=500, subsample=0.8)
train_xgb(max_depth=5, n_estimators=750, subsample=0.8)
train_xgb(max_depth=5, n_estimators=1000, subsample=0.8)



train_xgb(max_depth=7, n_estimators=500, subsample=0.5)
train_xgb(max_depth=7, n_estimators=750, subsample=0.5)
train_xgb(max_depth=7, n_estimators=1000, subsample=0.5)

train_xgb(max_depth=7, n_estimators=500, subsample=0.65)
train_xgb(max_depth=7, n_estimators=750, subsample=0.65)
train_xgb(max_depth=7, n_estimators=1000, subsample=0.65)

train_xgb(max_depth=7, n_estimators=500, subsample=0.8)
train_xgb(max_depth=7, n_estimators=750, subsample=0.8)
train_xgb(max_depth=7, n_estimators=1000, subsample=0.8)



train_xgb(max_depth=9, n_estimators=500, subsample=0.5)
train_xgb(max_depth=9, n_estimators=750, subsample=0.5)
train_xgb(max_depth=9, n_estimators=1000, subsample=0.5)

train_xgb(max_depth=9, n_estimators=500, subsample=0.65)
train_xgb(max_depth=9, n_estimators=750, subsample=0.65)
train_xgb(max_depth=9, n_estimators=1000, subsample=0.65)

train_xgb(max_depth=9, n_estimators=500, subsample=0.8)
train_xgb(max_depth=9, n_estimators=750, subsample=0.8)
train_xgb(max_depth=9, n_estimators=1000, subsample=0.8)


Cor Coeff: [[1.         0.81767947]
 [0.81767947 1.        ]]
R2: 0.6685234933417334
MAE: 1.7556354510675686e-06
MSE: 1.0240811794617909e-11
RMSE :  0.000003
5 , 500 , 0.5 , 0.6685234933417334 , 0.817679466760037 , 1.7556354510675686e-06 , 1.0240811794617909e-11
Cor Coeff: [[1.         0.82392753]
 [0.82392753 1.        ]]
R2: 0.6787532044808752
MAE: 1.7341829175934417e-06
MSE: 9.92476964868912e-12
RMSE :  0.000003
5 , 750 , 0.5 , 0.6787532044808752 , 0.8239275253142926 , 1.7341829175934417e-06 , 9.92476964868912e-12
Cor Coeff: [[1.        0.8277706]
 [0.8277706 1.       ]]
R2: 0.6851499515863717
MAE: 1.7185994576737988e-06
MSE: 9.727145135671396e-12
RMSE :  0.000003
5 , 1000 , 0.5 , 0.6851499515863717 , 0.8277706048198721 , 1.7185994576737988e-06 , 9.727145135671396e-12
Cor Coeff: [[1.         0.81741757]
 [0.81741757 1.        ]]
R2: 0.6680976883281802
MAE: 1.7526319985870211e-06
MSE: 1.0253966841559136e-11
RMSE :  0.000003
5 , 500 , 0.65 , 0.6680976883281802 , 0.8174175718267547 , 1

In [66]:
import csv
fields = ['max_depth', 'n_estimators', 'subsample', 'r_square', 'cor_coeff', 'mae', 'mse', 'train_time', 'test_time']
with open('stats_7nm_cr.csv', 'w') as f:
     
    # using csv.writer method from CSV package
    write = csv.writer(f)
     
    write.writerow(fields)
    write.writerows(stats)

In [67]:
!cat stats_7nm_cr.csv

max_depth,n_estimators,subsample,r_square,cor_coeff,mae,mse,train_time,test_time
5,500,0.5,0.6685234933417334,0.817679466760037,1.7556354510675686e-06,1.0240811794617909e-11,1.4975054264068604,0.024339675903320312
5,750,0.5,0.6787532044808752,0.8239275253142926,1.7341829175934417e-06,9.92476964868912e-12,2.156076431274414,0.02670574188232422
5,1000,0.5,0.6851499515863717,0.8277706048198721,1.7185994576737988e-06,9.727145135671396e-12,2.848210096359253,0.029152631759643555
5,500,0.65,0.6680976883281802,0.8174175718267547,1.7526319985870211e-06,1.0253966841559136e-11,1.4257454872131348,0.022793292999267578
5,750,0.65,0.6795322263220601,0.8243806866498627,1.7303223498824369e-06,9.900702132894717e-12,2.089324474334717,0.02604079246520996
5,1000,0.65,0.6866220547147446,0.8286587689218917,1.7146100013222245e-06,9.68166519734359e-12,2.7580742835998535,0.030282974243164062
5,500,0.8,0.6688278401217683,0.8178616448791137,1.7507156751292488e-06,1.0231409143051268e-11,1.431382656097412,0.02229523