In [1]:
from datasets import load_dataset
import pandas as pd

# Load the MetRex dataset
dataset = load_dataset("scale-lab/MetRex", split="train")

# Function to calculate combinational depth
def calculate_combinational_depth(row):
    # Extract the critical path from the row (assuming it's stored in a column called "critical_path")
    critical_path = row["critical_path"]
    
    # Count the number of combinational gates in the critical path
    combinational_gates = ["AND", "OR", "NOT", "MUX", "NAND", "NOR", "XOR", "XNOR"]  # Add other combinational gates if needed
    combinational_depth = 0
    
    for gate in critical_path:
        if gate["type"] in combinational_gates:
            combinational_depth += 1
    
    return combinational_depth

# Create a new dataset with relevant features
new_dataset = []
for row in dataset:
    signal = row["signal"]
    fan_in = row["fan_in"]
    fan_out = row["fan_out"]
    gate_types = row["gate_types"]
    load_capacitance = row["load_capacitance"]
    gate_delays = row["gate_delays"]
    
    # Calculate combinational depth
    combinational_depth = calculate_combinational_depth(row)
    
    # Add to the new dataset
    new_dataset.append({
        "Signal": signal,
        "Fan-In": fan_in,
        "Fan-Out": fan_out,
        "Gate Types": gate_types,
        "Load Capacitance": load_capacitance,
        "Gate Delays": gate_delays,
        "Combinational Depth": combinational_depth
    })

# Convert to DataFrame
df = pd.DataFrame(new_dataset)

# Save the dataset to a CSV file
df.to_csv("metrex_with_combinational_depth.csv", index=False)

README.md:   0%|          | 0.00/963 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


metrex.json:   0%|          | 0.00/137M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25868 [00:00<?, ? examples/s]

KeyError: 'signal'

In [None]:
from sklearn.model_selection import train_test_split

# Define features and target
features = ["Fan-In", "Fan-Out", "Gate Types", "Load Capacitance", "Gate Delays"]
target = "Combinational Depth"

X = df[features]
y = df[target]

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Train a Random Forest model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("metrex_with_combinational_depth.csv")

# Display the first few rows of the dataset
print(df.head())

In [None]:
from sklearn.preprocessing import OneHotEncoder

# One-hot encode the 'Gate Types' column
encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
gate_types_encoded = encoder.fit_transform(df[["Gate Types"]])
gate_types_encoded_df = pd.DataFrame(gate_types_encoded, columns=encoder.get_feature_names_out(["Gate Types"]))

# Drop the original 'Gate Types' column and concatenate the encoded features
df = df.drop("Gate Types", axis=1)
df = pd.concat([df, gate_types_encoded_df], axis=1)

# Display the updated dataset
print(df.head())

In [None]:
from sklearn.model_selection import train_test_split

# Define features and target
features = df.drop("Combinational Depth", axis=1)  # All columns except the target
target = df["Combinational Depth"]  # Target column

# Split the dataset (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Testing set: {X_test.shape}, {y_test.shape}")

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the Random Forest Regressor
model = RandomForestRegressor(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Display the model's parameters
print(model.get_params())

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Predict the combinational depth for the testing set
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

# Display the evaluation metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")

In [None]:
# Get feature importances
feature_importances = model.feature_importances_

# Create a DataFrame to display feature importances
importance_df = pd.DataFrame({
    "Feature": X_train.columns,
    "Importance": feature_importances
})

# Sort the DataFrame by importance (descending order)
importance_df = importance_df.sort_values(by="Importance", ascending=False)

# Display the feature importances
print(importance_df)

In [None]:
import joblib

# Save the model to a file
joblib.dump(model, "combinational_depth_predictor.pkl")

print("Model saved as 'combinational_depth_predictor.pkl'")

In [None]:
# Load the saved model
model = joblib.load("combinational_depth_predictor.pkl")

# Example: Predict combinational depth for a new signal
new_signal = {
    "Fan-In": 4,
    "Fan-Out": 1,
    "Load Capacitance": 0.01,
    "Gate Delays": 0.27,
    "Gate Types_AND": 1,  # One-hot encoded gate type
    "Gate Types_OR": 0,
    "Gate Types_NOT": 0,
    "Gate Types_MUX": 0
}

# Convert the new signal to a DataFrame
new_signal_df = pd.DataFrame([new_signal])

# Ensure the columns match the training data
new_signal_df = new_signal_df[X_train.columns]

# Predict the combinational depth
predicted_depth = model.predict(new_signal_df)
print(f"Predicted Combinational Depth: {predicted_depth[0]}")

In [None]:
import matplotlib.pyplot as plt

# Plot actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color="red", linestyle="--")  # Diagonal line
plt.xlabel("Actual Combinational Depth")
plt.ylabel("Predicted Combinational Depth")
plt.title("Actual vs Predicted Combinational Depth")
plt.show()

In [None]:
# Generate a summary of the results
results_summary = {
    "Mean Absolute Error (MAE)": mae,
    "Root Mean Squared Error (RMSE)": rmse,
    "R² Score": r2,
    "Top Features": importance_df.head(10).to_dict()  # Top 10 important features
}

# Display the results summary
print("Results Summary:")
for key, value in results_summary.items():
    print(f"{key}: {value}")

# output