In [45]:
import sys
import json
import pandas as pd
import numpy as np
import numbers
from collections import OrderedDict
from sklearn.preprocessing import MinMaxScaler
%pwd

'/home/amik/E-Drive/McGill/PhD/timing_estimation/synthesized_verilog_parser/note_books'

## Reading File

In [46]:
# File Path:
file_that_contains_the_node_feature = "/home/amik/E-Drive/McGill/PhD/timing_estimation/synthesized_verilog_parser/data/outputs/tsmc_180_slow_parsed_library_file.json"
preprocessed_node_features_csv_file = "/home/amik/E-Drive/McGill/PhD/timing_estimation/synthesized_verilog_parser/data/outputs/tsmc_180_slow_preprocessed.xlsx"




column_names_that_contain_function_names = ['A_function', 'BRB_function', 'CO0N_function', 'CO0_function',
       'CO1N_function', 'CO1_function', 'CON_function', 'CO_function',
       'ICO_function', 'PP_function', 'QN_function', 'Q_function',
       'S_function', 'X2_function', 'Y_function', 'cell_footprint']



def read_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

node_feature_json_dict = read_file(file_that_contains_the_node_feature)

## Collect Column Names

In [47]:
gate_names = node_feature_json_dict.keys()
column_names = []

for gate in gate_names:
    gate_feature_names = list(node_feature_json_dict[gate].keys())
    gate_feature_names.remove('pins')
    
    # Adding all features except the pins
    for feature_name in gate_feature_names:
        if feature_name not in column_names:
            column_names.append(feature_name)
    
    
    # Adding pin features
    pins = node_feature_json_dict[gate]['pins']
    pin_names = pins.keys()
    
    for pin_name in pin_names:
        pin_feature_names = list(pins[pin_name].keys())
        pin_feature_names = [ pin_name + '_' + feature_name  for feature_name in pin_feature_names]
        
        
        for pin_feature_name in pin_feature_names:
            if pin_feature_name not in column_names:
                column_names.append(pin_feature_name)
       
       
column_names.append('gate_name')
column_names.sort()

## Add Column Features

In [48]:
# Initialize an empty DataFrame with specified column names
df = pd.DataFrame(columns=column_names)


def convert_to_float(val):
    if isinstance(val, str) and val.replace('.', '', 1).isdigit():
        val = float(val)
    if isinstance(val,numbers.Number):
        val = float(val)
    return val

In [49]:

for gate in gate_names:
    
    row_dict = {}
    row_dict['gate_name'] = gate
    gate_features = node_feature_json_dict[gate]
    
    
    # Adding gate features except the pins features
    gate_feature_names = list(gate_features.keys())
    gate_feature_names.remove('pins')
    for gate_feature_name in gate_feature_names:
        row_dict[gate_feature_name] = convert_to_float(gate_features[gate_feature_name])
        
    
    
    # Adding pins features
    pins = gate_features['pins']
    pin_names = pins.keys()
   

    for pin_name in pin_names:
        pin_features = pins[pin_name]
        pin_feature_names = list(pin_features.keys())
        renamed_pin_feature_names = [ pin_name + '_' + feature_name  for feature_name in pin_feature_names]
        
        for pin_feature_name in pin_feature_names:
            
            # Label encoding for pin direction. Output = 0, Input 1
            if 'direction' in pin_feature_name:
                pin_features[pin_feature_name] = 0 if pin_features[pin_feature_name] == 'output' else 1
            row_dict[pin_name + '_' + pin_feature_name] = convert_to_float(pin_features[pin_feature_name])


    # Adding zeros to the columns that donot belog to this gate
    row_actual_feature_names = list(row_dict.keys())
    for column_name in column_names:
        if column_name not in row_actual_feature_names:
            row_dict[column_name] = 0.0
        
    # Sort row_dict by keys
    sorted_row_dict = OrderedDict(sorted(row_dict.items()))
    
    # Append the sorted dictionary to the DataFrame
    df = pd.concat([df, pd.DataFrame([sorted_row_dict])], ignore_index=True)


  df = pd.concat([df, pd.DataFrame([sorted_row_dict])], ignore_index=True)


In [50]:
df.head()

Unnamed: 0,A0N_capacitance,A0N_direction,A0_capacitance,A0_direction,A1N_capacitance,A1N_direction,A1_capacitance,A1_direction,A2_capacitance,A2_direction,...,Y_capacitance,Y_direction,Y_function,Y_max_capacitance,Y_number_of_gates,area,cell_footprint,cell_leakage_power,gate_name,total_number_of_gates_in_block
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,76.5072,addfh,4104.18252,ADDFHX1,6.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,113.0976,addfh,7522.90416,ADDFHX2,6.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,116.424,addfh,8036.7309,ADDFHX4,6.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,73.1808,addfh,3398.98032,ADDFHXL,6.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,69.8544,addf,3632.35914,ADDFX1,6.0


## Feature Engineering - Dealing with categorical values

In [51]:
# Select columns with dtype 'object', which are typically strings
string_columns = df.select_dtypes(include=['object']).columns

# Print the names of these columns
print(string_columns)

Index(['A_function', 'BRB_function', 'CO0N_function', 'CO0_function',
       'CO1N_function', 'CO1_function', 'CON_function', 'CO_function',
       'ICO_function', 'PP_function', 'QN_function', 'Q_function',
       'S_function', 'X2_function', 'Y_function', 'cell_footprint',
       'gate_name'],
      dtype='object')


## Feature Engineering - Scaling Numerical Features

In [52]:
# Identify numerical columns (assuming columns with dtype 'object' are non-numeric)
numerical_columns = df.select_dtypes([np.number]).columns
print("Numerical columns:", numerical_columns)

Numerical columns: Index(['A0N_capacitance', 'A0N_direction', 'A0_capacitance', 'A0_direction',
       'A1N_capacitance', 'A1N_direction', 'A1_capacitance', 'A1_direction',
       'A2_capacitance', 'A2_direction',
       ...
       'X2_direction', 'X2_max_capacitance', 'X2_number_of_gates',
       'Y_capacitance', 'Y_direction', 'Y_max_capacitance',
       'Y_number_of_gates', 'area', 'cell_leakage_power',
       'total_number_of_gates_in_block'],
      dtype='object', length=164)


In [53]:
# Initialize Min-Max Scaler
scaler = MinMaxScaler()

# Scale the numerical columns
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

In [54]:
df.head()

Unnamed: 0,A0N_capacitance,A0N_direction,A0_capacitance,A0_direction,A1N_capacitance,A1N_direction,A1_capacitance,A1_direction,A2_capacitance,A2_direction,...,Y_capacitance,Y_direction,Y_function,Y_max_capacitance,Y_number_of_gates,area,cell_footprint,cell_leakage_power,gate_name,total_number_of_gates_in_block
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.377049,addfh,0.291273,ADDFHX1,0.333333
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.557377,addfh,0.5339,ADDFHX2,0.333333
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.57377,addfh,0.570366,ADDFHX4,0.333333
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.360656,addfh,0.241225,ADDFHXL,0.333333
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.344262,addf,0.257788,ADDFX1,0.333333


## Saving Dataframe as a CSV File

In [55]:
# Removing columns that contain the function
df = df.drop(columns = column_names_that_contain_function_names, axis=1)
df.head()



# Bringing gate name at the beginning 

column_order = ['gate_name'] + [col for col in df.columns if col != 'gate_name']

# Reorder the columns
df = df[column_order]

In [57]:
df.to_excel(preprocessed_node_features_csv_file, index=False) 

In [None]:
# # Select columns with dtype 'object', which are typically strings
# string_columns = df.select_dtypes(include=['object']).columns
# 
# # Print the names of these columns
# print(string_columns)

In [None]:
# from sentence_transformers import SentenceTransformer, util
# model = SentenceTransformer('msmarco-distilbert-dot-v5')
# 
# query_embedding = model.encode('A | B ^ C | D')
# passage_embedding = model.encode(['B | A ^ C'])
# 
# print("Similarity:", util.dot_score(query_embedding, passage_embedding))

In [4]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'gate_name': ['gate1', 'gate2', 'gate3', 'gate4'],
    'feature1': [10, 20, 30, 40],
    'feature2': [1.1, 2.2, 3.3, 4.4],
    'feature3' : ['A', 'B', 'C', 'D']
})

# The specific gate name you're looking for
specific_gate_name = 'gate2'

# Get the row where 'gate_name' equals the specific gate name
specific_row = df.loc[df['gate_name'] == specific_gate_name]

# Convert the row to a list, excluding the index and column names
row_as_list = specific_row.values.tolist()[0][1:]  # We use [0] to get the first (and only) item

print(row_as_list)


[20, 2.2, 'B']
