In [213]:
import os
import re
import numpy as np
import networkx as nx
from ast import literal_eval
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [214]:
folder_path = r"C:\Users\sreev\Downloads\GGH_ML"

In [215]:
all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]
df_list = [pd.read_csv(file) for file in all_files]
df = pd.concat(df_list, ignore_index=True)

print(f"Total Rows: {df.shape[0]} - Columns: {df.shape[1]}")



Total Rows: 5504 - Columns: 9


In [216]:
df.head()

Unnamed: 0,ID,Equation,"Inputs (A, B, C, D, E)",Circuit,Min Delay (ns),Max Delay (ns),Combinational Depth,Final Output (Y),"Inputs (A, B, C, D)"
0,1,"AND(AND(AND(AND(A, B), C), D), E)","[1, 0, 0, 1, 1]",AND -> AND -> AND -> AND,8.0,12.0,4,0,
1,2,"AND(AND(AND(OR(A, B), C), D), E)","[0, 1, 1, 1, 1]",AND -> AND -> AND -> OR,8.0,12.0,4,1,
2,3,"AND(AND(AND(XOR(A, B), C), D), E)","[0, 0, 1, 0, 1]",AND -> AND -> AND -> XOR,8.5,12.5,4,0,
3,4,"AND(AND(AND(NAND(A, B), C), D), E)","[1, 1, 1, 0, 1]",AND -> AND -> AND -> NAND,7.5,11.5,4,0,
4,5,"AND(AND(AND(NOR(A, B), C), D), E)","[0, 1, 0, 1, 1]",AND -> AND -> AND -> NOR,7.5,11.5,4,0,


In [217]:
logic_gates = ["AND", "OR", "NOT", "BUFFER", "NAND", "NOR", "XOR", "XNOR"]

def count_logic_gates(equation):
    counts = {gate: len(re.findall(gate, equation)) for gate in logic_gates}
    return counts



In [218]:
logic_counts_df = df["Equation"].apply(lambda eq: pd.Series(count_logic_gates(eq)))

df = pd.concat([df, logic_counts_df], axis=1)

df.drop(columns=["Equation"], inplace=True)


In [157]:
df

Unnamed: 0,ID,"Inputs (A, B, C, D, E)",Circuit,Min Delay (ns),Max Delay (ns),Combinational Depth,Final Output (Y),"Inputs (A, B, C, D)",AND,OR,NOT,BUFFER,NAND,NOR,XOR,XNOR
0,1,"[1, 0, 0, 1, 1]",AND -> AND -> AND -> AND,8.0,12.0,4,0,,4,0,0,0,0,0,0,0
1,2,"[0, 1, 1, 1, 1]",AND -> AND -> AND -> OR,8.0,12.0,4,1,,3,1,0,0,0,0,0,0
2,3,"[0, 0, 1, 0, 1]",AND -> AND -> AND -> XOR,8.5,12.5,4,0,,3,1,0,0,0,0,1,0
3,4,"[1, 1, 1, 0, 1]",AND -> AND -> AND -> NAND,7.5,11.5,4,0,,4,0,0,0,1,0,0,0
4,5,"[0, 1, 0, 1, 1]",AND -> AND -> AND -> NOR,7.5,11.5,4,0,,3,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5499,508,,XNOR -> XNOR -> BUFFER,5.2,7.5,3,1,"[1, 1, 1, 0]",0,2,0,1,0,2,0,2
5500,509,,XNOR -> XNOR -> NAND,6.5,9.5,3,0,"[0, 1, 0, 1]",1,2,0,0,1,2,0,2
5501,510,,XNOR -> XNOR -> NOR,6.5,9.5,3,1,"[0, 0, 0, 0]",0,3,0,0,0,3,0,2
5502,511,,XNOR -> XNOR -> XOR,7.5,10.5,3,0,"[1, 0, 1, 0]",0,3,0,0,0,2,1,2


In [219]:
df.drop(columns = ["Circuit"],inplace = True)

In [220]:
df.head()

Unnamed: 0,ID,"Inputs (A, B, C, D, E)",Min Delay (ns),Max Delay (ns),Combinational Depth,Final Output (Y),"Inputs (A, B, C, D)",AND,OR,NOT,BUFFER,NAND,NOR,XOR,XNOR
0,1,"[1, 0, 0, 1, 1]",8.0,12.0,4,0,,4,0,0,0,0,0,0,0
1,2,"[0, 1, 1, 1, 1]",8.0,12.0,4,1,,3,1,0,0,0,0,0,0
2,3,"[0, 0, 1, 0, 1]",8.5,12.5,4,0,,3,1,0,0,0,0,1,0
3,4,"[1, 1, 1, 0, 1]",7.5,11.5,4,0,,4,0,0,0,1,0,0,0
4,5,"[0, 1, 0, 1, 1]",7.5,11.5,4,0,,3,1,0,0,0,1,0,0


In [221]:
df["Inputs (A, B, C, D)"]

0                NaN
1                NaN
2                NaN
3                NaN
4                NaN
            ...     
5499    [1, 1, 1, 0]
5500    [0, 1, 0, 1]
5501    [0, 0, 0, 0]
5502    [1, 0, 1, 0]
5503    [0, 1, 0, 1]
Name: Inputs (A, B, C, D), Length: 5504, dtype: object

In [222]:
df.drop(columns=["Inputs (A, B, C, D, E)"], inplace=True)

In [223]:
df.head()

Unnamed: 0,ID,Min Delay (ns),Max Delay (ns),Combinational Depth,Final Output (Y),"Inputs (A, B, C, D)",AND,OR,NOT,BUFFER,NAND,NOR,XOR,XNOR
0,1,8.0,12.0,4,0,,4,0,0,0,0,0,0,0
1,2,8.0,12.0,4,1,,3,1,0,0,0,0,0,0
2,3,8.5,12.5,4,0,,3,1,0,0,0,0,1,0
3,4,7.5,11.5,4,0,,4,0,0,0,1,0,0,0
4,5,7.5,11.5,4,0,,3,1,0,0,0,1,0,0


In [224]:
df = df.drop(columns = ["ID"])

In [225]:
df

Unnamed: 0,Min Delay (ns),Max Delay (ns),Combinational Depth,Final Output (Y),"Inputs (A, B, C, D)",AND,OR,NOT,BUFFER,NAND,NOR,XOR,XNOR
0,8.0,12.0,4,0,,4,0,0,0,0,0,0,0
1,8.0,12.0,4,1,,3,1,0,0,0,0,0,0
2,8.5,12.5,4,0,,3,1,0,0,0,0,1,0
3,7.5,11.5,4,0,,4,0,0,0,1,0,0,0
4,7.5,11.5,4,0,,3,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5499,5.2,7.5,3,1,"[1, 1, 1, 0]",0,2,0,1,0,2,0,2
5500,6.5,9.5,3,0,"[0, 1, 0, 1]",1,2,0,0,1,2,0,2
5501,6.5,9.5,3,1,"[0, 0, 0, 0]",0,3,0,0,0,3,0,2
5502,7.5,10.5,3,0,"[1, 0, 1, 0]",0,3,0,0,0,2,1,2


In [226]:
from ast import literal_eval

# Ensure missing values are replaced with a default list of five zeros
df["Inputs (A, B, C, D)"] = df["Inputs (A, B, C, D)"].fillna("[0, 0, 0, 0]")

# Convert string representations of lists into actual lists
df["Inputs (A, B, C, D)"] = df["Inputs (A, B, C, D)"].apply(lambda x: literal_eval(str(x)))


In [227]:
df["A"] = df["Inputs (A, B, C, D)"].apply(lambda x: x[0])
df["B"] = df["Inputs (A, B, C, D)"].apply(lambda x: x[1])
df["C"] = df["Inputs (A, B, C, D)"].apply(lambda x: x[2])
df["D"] = df["Inputs (A, B, C, D)"].apply(lambda x: x[3])



In [228]:
df

Unnamed: 0,Min Delay (ns),Max Delay (ns),Combinational Depth,Final Output (Y),"Inputs (A, B, C, D)",AND,OR,NOT,BUFFER,NAND,NOR,XOR,XNOR,A,B,C,D
0,8.0,12.0,4,0,"[0, 0, 0, 0]",4,0,0,0,0,0,0,0,0,0,0,0
1,8.0,12.0,4,1,"[0, 0, 0, 0]",3,1,0,0,0,0,0,0,0,0,0,0
2,8.5,12.5,4,0,"[0, 0, 0, 0]",3,1,0,0,0,0,1,0,0,0,0,0
3,7.5,11.5,4,0,"[0, 0, 0, 0]",4,0,0,0,1,0,0,0,0,0,0,0
4,7.5,11.5,4,0,"[0, 0, 0, 0]",3,1,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5499,5.2,7.5,3,1,"[1, 1, 1, 0]",0,2,0,1,0,2,0,2,1,1,1,0
5500,6.5,9.5,3,0,"[0, 1, 0, 1]",1,2,0,0,1,2,0,2,0,1,0,1
5501,6.5,9.5,3,1,"[0, 0, 0, 0]",0,3,0,0,0,3,0,2,0,0,0,0
5502,7.5,10.5,3,0,"[1, 0, 1, 0]",0,3,0,0,0,2,1,2,1,0,1,0


In [229]:
feature_cols = ["A", "B", "C", "D", "Min Delay (ns)", "Max Delay (ns)", 
                "AND", "OR", "NOT", "BUFFER", "NAND", "NOR", "XOR", "XNOR"]

X = df[feature_cols]  # Features (inputs)
Y = df["Combinational Depth"]  # Target (output)


In [230]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [231]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)


In [232]:
models = {
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
    "Linear Regression": LinearRegression(),
    "Support Vector Regressor": SVR()
}

In [233]:
for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)

    mae = mean_absolute_error(Y_test, Y_pred)
    mse = mean_squared_error(Y_test, Y_pred)
    r2 = r2_score(Y_test, Y_pred)

    print(f"{name} - MAE: {mae:.4f}, MSE: {mse:.4f}, R²: {r2:.4f}")


Training Random Forest Regressor...
Random Forest Regressor - MAE: 0.0004, MSE: 0.0000, R²: 0.9999

Training Linear Regression...
Linear Regression - MAE: 0.0000, MSE: 0.0000, R²: 1.0000

Training Support Vector Regressor...
Support Vector Regressor - MAE: 0.0521, MSE: 0.0038, R²: 0.9838


In [234]:
for model in models:
    print(model)
    Y_pred = models[model].predict(X_test)  
    
    mae = mean_absolute_error(Y_test, Y_pred)
    mse = mean_squared_error(Y_test, Y_pred)
    r2 = r2_score(Y_test, Y_pred)
    
    print("\nTest Data Evaluation:")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"R² Score: {r2:.4f}")

Random Forest Regressor

Test Data Evaluation:
Mean Absolute Error (MAE): 0.0004
Mean Squared Error (MSE): 0.0000
R² Score: 0.9999
Linear Regression

Test Data Evaluation:
Mean Absolute Error (MAE): 0.0000
Mean Squared Error (MSE): 0.0000
R² Score: 1.0000
Support Vector Regressor

Test Data Evaluation:
Mean Absolute Error (MAE): 0.0521
Mean Squared Error (MSE): 0.0038
R² Score: 0.9838


In [235]:
def read_rtl_file(file_path):
    """ Reads an RTL (Verilog) file and extracts the lines. """
    with open(file_path, "r") as f:
        lines = f.readlines()
    return lines

# Example usage:
rtl_code = read_rtl_file(r"C:\Users\sreev\Downloads\GGH_ML\example_rtl2.v")
print("First 10 lines of RTL:", rtl_code[:10])


First 10 lines of RTL: ['module logic_circuit (\n', '    input A, B, C, D, E,\n', '    output Y\n', ');\n', '    wire nand_out, or_out, and1_out;\n', '    nand (nand_out, A, B);\n', '    or (or_out, nand_out, C);\n', '    and (and1_out, or_out, D);\n', '    and (Y, and1_out, E);\n', 'endmodule\n']


In [236]:
def parse_verilog(rtl_lines):
    """
    Parses Verilog RTL to extract logic gates and their connections.
    Handles multiple gate instantiations on a single line.
    """
    logic_gates = ["AND", "OR", "NOT", "BUFFER", "NAND", "NOR", "XOR", "XNOR"]
    circuit_data = []

    # Updated regex to handle multiple gate instances per line
    gate_pattern = re.compile(r"\b(\w+)\s*\(([^)]+)\);")

    for line in rtl_lines:
        matches = gate_pattern.findall(line)
        for match in matches:
            gate_type, connections = match
            inputs = [x.strip() for x in connections.split(",")]

            if gate_type.upper() in logic_gates:
                circuit_data.append({
                    "Gate Type": gate_type.upper(),
                    "Output": inputs[0],  # First element is output
                    "Inputs": inputs[1:]  # Remaining elements are inputs
                })

    return circuit_data

# Test with your RTL file
parsed_circuit = parse_verilog(rtl_code)
print(parsed_circuit)  # Check extracted gates

[{'Gate Type': 'NAND', 'Output': 'nand_out', 'Inputs': ['A', 'B']}, {'Gate Type': 'OR', 'Output': 'or_out', 'Inputs': ['nand_out', 'C']}, {'Gate Type': 'AND', 'Output': 'and1_out', 'Inputs': ['or_out', 'D']}, {'Gate Type': 'AND', 'Output': 'Y', 'Inputs': ['and1_out', 'E']}]


In [237]:
def build_circuit_graph(df):
    """
    Constructs a directed graph representation of the circuit.
    Nodes represent signals, and edges represent connections between gates.
    """
    G = nx.DiGraph()  # Create a directed graph

    for index, row in df.iterrows():
        output_signal = row["Output"]
        input_signals = [row["A"], row["B"], row["C"], row["D"]]
        
        for inp in input_signals:
            if pd.notna(inp) and inp != output_signal:  # Avoid self-loops
                G.add_edge(inp, output_signal)

    return G


In [238]:
def calculate_combinational_depth(G):
    """
    Computes the longest path in the graph to determine logical depth.
    """
    if len(G.nodes) == 0:
        return 0  # No nodes, depth is 0
    
    longest_depth = 0
    for node in G.nodes:
        for target in G.nodes:
            if node != target and nx.has_path(G, node, target):
                path_length = nx.shortest_path_length(G, source=node, target=target)
                longest_depth = max(longest_depth, path_length)

    return longest_depth


In [239]:
circuit_graph = build_circuit_graph(df_rtl)

# Debugging: Print circuit connections
print("Circuit Graph Edges:", list(circuit_graph.edges))


Circuit Graph Edges: [('A', 'xor_out'), ('A', 'Y'), ('xor_out', 'nor_out'), ('D', 'xor_out'), ('nor_out', 'Y'), ('B', 'nor_out')]


In [240]:
import pandas as pd

def extract_features_from_rtl(rtl_file_path):
    """
    Reads an RTL file, extracts gate types, counts occurrences, and calculates combinational depth.
    """
    rtl_code = read_rtl_file(rtl_file_path)  # Read RTL file
    parsed_circuit = parse_verilog(rtl_code)  # Extract gate data

    # Convert parsed circuit to DataFrame
    df_rtl = convert_to_dataframe(parsed_circuit)

    # Count occurrences of each gate type
    gate_counts = {gate: 0 for gate in ["AND", "OR", "NOT", "BUFFER", "NAND", "NOR", "XOR", "XNOR"]}

    for _, row in df_rtl.iterrows():
        gate_type = row["Gate Type"]
        if gate_type in gate_counts:
            gate_counts[gate_type] += 1

    # Convert dictionary to DataFrame
    df_features = pd.DataFrame([gate_counts])

    # Compute combinational depth (sum of gate occurrences)
    combinational_depth = sum(df_features.iloc[0])

    return df_features, combinational_depth  # Return both features and depth

# Example usage
df_rtl_features, comb_depth = extract_features_from_rtl(r"C:\Users\sreev\Downloads\GGH_ML\example_rtl2.v")

# Print extracted features
print("Extracted RTL Features:\n", df_rtl_features)

# Print combinational depth separately on a new line
print("\nCombinational Depth:", comb_depth)


Extracted RTL Features:
    AND  OR  NOT  BUFFER  NAND  NOR  XOR  XNOR
0    2   1    0       0     1    0    0     0

Combinational Depth: 4


In [241]:
import pandas as pd

GATE_DELAYS = {
    "AND": (2, 3),
    "OR": (2, 3),
    "NOT": (1, 1.5),
    "BUFFER": (0.2, 0.5),
    "NAND": (1.5, 2.5),
    "NOR": (1.5, 2.5),
    "XOR": (2.5, 3.5),
    "XNOR": (2.5, 3.5)
}

def extract_features_from_rtl(rtl_file_path):
    """
    Reads an RTL file, extracts gate types, counts occurrences, and calculates combinational depth.
    """
    rtl_code = read_rtl_file(rtl_file_path)  # Read RTL file
    parsed_circuit = parse_verilog(rtl_code)  # Extract gate data

    # Convert parsed circuit to DataFrame
    df_rtl = convert_to_dataframe(parsed_circuit)

    # Count occurrences of each gate type
    gate_counts = {gate: 0 for gate in GATE_DELAYS.keys()}

    for _, row in df_rtl.iterrows():
        gate_type = row["Gate Type"]
        if gate_type in gate_counts:
            gate_counts[gate_type] += 1

    # Convert dictionary to DataFrame
    df_features = pd.DataFrame([gate_counts])

    # Compute combinational depth (sum of gate occurrences)
    combinational_depth = sum(df_features.iloc[0])

    return df_features, combinational_depth  # Return both features and depth

def calculate_min_max_delay(df_features):
    """Calculate overall min and max delay based on gate counts."""
    min_delay = sum(df_features[gate].values[0] * GATE_DELAYS[gate][0] for gate in GATE_DELAYS)
    max_delay = sum(df_features[gate].values[0] * GATE_DELAYS[gate][1] for gate in GATE_DELAYS)
    return min_delay, max_delay

def check_timing_violations(min_delay, max_delay, clock_period):
    """
    Calculate setup and hold time violations.
    - Setup violation: If max delay ≥ clock period
    - Hold violation: If min delay ≤ some hold margin (assumed as 10% of clock period)
    """
    hold_margin = 0.1 * clock_period  # Assuming hold time margin as 10% of clock
    setup_violation = max_delay >= clock_period
    hold_violation = min_delay <= hold_margin
    return setup_violation, hold_violation

# Extract features from RTL file
rtl_file_path = r"C:\Users\sreev\Downloads\GGH_ML\example_rtl2.v"
df_rtl_features, combinational_depth = extract_features_from_rtl(rtl_file_path)

# Calculate delays
min_delay, max_delay = calculate_min_max_delay(df_rtl_features)

# Set clock period (example: 10ns)
clock_period = 10  # Modify as needed
setup_violation, hold_violation = check_timing_violations(min_delay, max_delay, clock_period)

# Print results
print("Extracted RTL Features:\n", df_rtl_features)
print("\nCombinational Depth:", combinational_depth)
print(f"Min Delay: {min_delay} ns, Max Delay: {max_delay} ns")
print(f"Setup Violation: {setup_violation}, Hold Violation: {hold_violation}")


Extracted RTL Features:
    AND  OR  NOT  BUFFER  NAND  NOR  XOR  XNOR
0    2   1    0       0     1    0    0     0

Combinational Depth: 4
Min Delay: 7.5 ns, Max Delay: 11.5 ns
Setup Violation: True, Hold Violation: False
