In [None]:
from os import listdir
from os.path import isfile, join
import networkx as nx
import numpy as np
from PySpice.Spice.Parser import SpiceParser
from PySpice.Spice import BasicElement
from PySpice.Spice.Netlist import Node
import helpers as h
import multiprocessing
from tqdm.notebook import tqdm
import tqdm
from joblib import Parallel, delayed
from sklearn.model_selection import GridSearchCV,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
import multiprocessing
from sklearn.utils import shuffle
import dgsd
import pickle
import netlsd
from sklearn.metrics import f1_score, accuracy_score,roc_auc_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
def load_graphs(path):
    files =  [path+f for f in listdir(path) if isfile(join(path, f))]
    # print(files)
    netlists = ( (f, open(f, 'rb').read().decode('utf-8', 'ignore')) for f in files)
    valid_files = [ (f,src) for (f, src) in netlists if h.is_valid_netlist(src)]
   
    train_data, test_data = {},{}
    train_ratio = 0.8
    train_count = int(len(valid_files)*train_ratio)
    print(train_count)
    ## splitting train and test data
    index = 0
    for (f,src) in valid_files:
            # print(f)
        component_list, g = h.netlist_as_graph(src)
        if index<train_count:
            train_data[index] = (component_list, g)
            index +=1
        else:
            test_data[index] = (component_list, g)
            index +=1
    return train_data, test_data
def create_datasets_original_circuits(train_data, test_data,min_limit, max_limit):
    map_file = 'data/ltspice_examples_label_mapping.pkl'
    with open(map_file, 'rb') as f:
        mapping = pickle.load(f)
        f.close()
    train_dataset, test_dataset = [],[]
    mapping_keys = list(mapping.keys())
    components = []
    for ind, (c,g) in train_data.items():
        if (g.number_of_nodes()<min_limit) or (g.number_of_nodes()>max_limit):
        #         print("continue with ",g.number_of_nodes())
            continue
        types = [type(comp) for comp in c]
        flag = False
        for t in types:
            if t not in mapping_keys:
                flag = True
                break
        if flag ==True:
            continue
        for t in types:
            if t not in components:
                components.append(t)
        type_mapping = {}
        for i, n in enumerate(g.nodes()):
            type_mapping[n] = types[i]
        nx.set_node_attributes(g, type_mapping, "type")
        train_dataset.append(g)
    for ind, (c,g) in test_data.items():
        if (g.number_of_nodes()<min_limit) or (g.number_of_nodes()>max_limit):
        #         print("continue with ",g.number_of_nodes())
            continue
        types = [type(comp) for comp in c]
        flag = False
        for t in types:
            if t not in mapping_keys:
                flag = True
                break
        if flag ==True:
            continue
        for t in types:
            if t not in components:
                components.append(t)
        if len(types)<g.number_of_nodes():
            continue
        type_mapping = {}
        for i, n in enumerate(g.nodes()):
                type_mapping[n] = types[i]
        nx.set_node_attributes(g, type_mapping, "type")
        test_dataset.append(g)
    mapping_labels = {}
    for i, c in enumerate(components):
        mapping_labels[c] = i
    return train_dataset, test_dataset,mapping_labels
            
def construct_dataset(train_data, test_data,min_limit, max_limit):
    train_dataset, test_dataset = {},{}       
    for ind, (c,g) in train_data.items():
        if (g.number_of_nodes()<min_limit) or (g.number_of_nodes()>max_limit):
    #         print("continue with ",g.number_of_nodes())
            continue
        types = [type(comp) for comp in c]
        type_mapping = {}
        for i, n in enumerate(g.nodes()):
            type_mapping[n] = types[i]
        nx.set_node_attributes(g, type_mapping, "type")
        unique_types= list(set(types))
        for i in range(len(unique_types)):
            g_ = g.copy()
            t = unique_types[i]
            for index,cc in enumerate(c):
                if type(cc)==t:
                    g_.remove_node(index)
                    break


            if t in list(train_dataset.keys()):
                train_dataset[t].append(g_)
            else:
                train_dataset[t] = [g_]

    for ind, (c,g) in test_data.items():
        if (g.number_of_nodes()<min_limit) or (g.number_of_nodes()>max_limit):
            continue
        types = [type(comp) for comp in c]
        type_mapping = {}
        for i, n in enumerate(g.nodes()):
            type_mapping[n] = types[i]
        nx.set_node_attributes(g, type_mapping, "type")
        unique_types= list(set(types))
        for i in range(len(unique_types)):
            g_ = g.copy()
            t = unique_types[i]
            for index,cc in enumerate(c):
                if type(cc)==t:
                    g_.remove_node(index)
                    break

            if t in list(test_dataset.keys()):
                test_dataset[t].append(g_)
            else:
                test_dataset[t] = [g_]
    return train_dataset, test_dataset
def filter_data(train_dataset, test_dataset, instances_limit):
    train_dataset_filter, test_dataset_filter = {},{}
    i = 0
    for k, v in train_dataset.items():
        if len(v)>instances_limit:
#             print(k)
            train_dataset_filter[k] = v
            test_dataset_filter[k] = test_dataset.get(k)
        i +=1
    return train_dataset_filter, test_dataset_filter
def map_labels(train_dataset_filter):
    label_mapping = {}
    l = 0
    for k, v in train_dataset_filter.items():
        label_mapping[k] = l
        l +=1
    return label_mapping

def print_train_test_type_count(train_dataset_filter,test_dataset_filter):
    counter_train = count_test = 0
    for k, v in train_dataset_filter.items():
        print("train:",k, len(v))
        print("test:",len(test_dataset_filter.get(k)))
        counter_train +=len(v)
        count_test +=len(test_dataset_filter.get(k))
    print(counter_train, count_test)
def write_mapping(file, mapping):
    with open(file,"wb") as f:
        pickle.dump(mapping,f)
    f.close()
def create_train_test(train_dataset_filter,test_dataset_filter):
    train_graphs,test_graphs, train_y, test_y = [],[],[],[]
    for k,v in train_dataset_filter.items():
        for g in v:
            train_graphs.append(g)
            train_y.append(label_mapping.get(k))
    for k,v in test_dataset_filter.items():
        for g in v:
            test_graphs.append(g)
            test_y.append(label_mapping.get(k))
    return train_graphs,test_graphs, train_y, test_y

def write_dataset_dic(file, train_graphs,test_graphs, train_y, test_y):
    data_dic = {'train_x':train_graphs, 'train_y':train_y, 'test_x':test_graphs, 'test_y':test_y}
    with open(file, 'wb') as f:
        pickle.dump(data_dic, f)
        f.close()


### construct dataset for component classification

In [None]:
##ltspice-examples
path = "data/spice-datasets/ltspice_examples/"
min_limit, max_limit,instances_limit = 5,500,300
train_data, test_data = load_graphs(path)
train_dataset, test_dataset = construct_dataset(train_data, test_data,min_limit, max_limit)
train_dataset_filter, test_dataset_filter = filter_data(train_dataset, test_dataset,instances_limit)

In [None]:
### save dataset
map_path = "data/ltspice_examples_label_mapping_.pkl"
label_mapping = map_labels(train_dataset_filter)
print_train_test_type_count(train_dataset_filter,test_dataset_filter)
write_mapping(map_path, label_mapping)
train_graphs,test_graphs, train_y, test_y = create_train_test(train_dataset_filter,test_dataset_filter)
file_write = "data/ltspice_examples_GC_complete.pkl"
write_dataset_dic(file_write, train_graphs,test_graphs, train_y, test_y)

In [None]:
len(train_data),len(test_data),len(train_graphs),len(test_graphs)

### create dataset for link prediction task

In [None]:
train_dataset, test_dataset,components = create_datasets_original_circuits(train_data, test_data,min_limit, max_limit)
map_path = "data/ltspice_examples_label_mapping.pkl"
label_mapping = map_labels(train_dataset_filter)
data_dic = {'train_x':train_dataset,  'test_x':test_dataset}
write_mapping(map_path, components)
file_write = "data/ltspice_examples_LP_complete.pkl"
with open(file_write, 'wb') as f:
    pickle.dump(data_dic, f)
    f.close()