In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb




In [None]:

# Load dataset
csv_file = "yosys_features.csv" 
df = pd.read_csv(csv_file)

# Drop "File Name" column
df = df.drop(columns=["File Name"])

# Separate features and target variable
X = df.drop(columns=["Logic Depth"]).values
y = df["Logic Depth"].values.reshape(-1, 1)  # Reshape for consistency

# Normalize features
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)

# Convert data to PyTorch tensors
real_data = torch.tensor(np.hstack((X_scaled, y_scaled)), dtype=torch.float32)

# GAN parameters
input_dim = real_data.shape[1]  # Number of features + target
latent_dim = 10  # Size of random noise vector
hidden_dim = 64  # Hidden layer size
batch_size = 16
epochs = 5000
lr = 0.0002  # Learning rate

# Define Generator
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, z):
        return self.model(z)

# Define Discriminator
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# Initialize models
generator = Generator(latent_dim, input_dim)
discriminator = Discriminator(input_dim)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=lr)
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr)

# Training loop
for epoch in range(epochs):
    # Train Discriminator
    optimizer_D.zero_grad()

    real_labels = torch.ones(batch_size, 1)
    fake_labels = torch.zeros(batch_size, 1)

    real_samples = real_data[torch.randint(0, real_data.shape[0], (batch_size,))]
    real_preds = discriminator(real_samples)
    real_loss = criterion(real_preds, real_labels)

    z = torch.randn(batch_size, latent_dim)
    fake_samples = generator(z)
    fake_preds = discriminator(fake_samples.detach())
    fake_loss = criterion(fake_preds, fake_labels)

    d_loss = real_loss + fake_loss
    d_loss.backward()
    optimizer_D.step()

    # Train Generator
    optimizer_G.zero_grad()
    fake_preds = discriminator(fake_samples)
    g_loss = criterion(fake_preds, real_labels)
    g_loss.backward()
    optimizer_G.step()

    if epoch % 500 == 0:
        print(f"Epoch [{epoch}/{epochs}], D Loss: {d_loss.item()}, G Loss: {g_loss.item()}")

# Generate synthetic data
num_samples = 100 
z = torch.randn(num_samples, latent_dim)
synthetic_data = generator(z).detach().numpy()

# Convert back to original scale
synthetic_X = scaler_X.inverse_transform(synthetic_data[:, :-1])
synthetic_y = scaler_y.inverse_transform(synthetic_data[:, -1].reshape(-1, 1))

# Save synthetic data
synthetic_df = pd.DataFrame(synthetic_X, columns=df.drop(columns=["Logic Depth"]).columns)
synthetic_df["Logic Depth"] = synthetic_y
synthetic_df.to_csv("synthetic_data.csv", index=False)

print("Synthetic data generated and saved to synthetic_data.csv")




Epoch [0/5000], D Loss: 1.362766981124878, G Loss: 0.6465383172035217
Epoch [500/5000], D Loss: 1.056244134902954, G Loss: 1.4027820825576782
Epoch [1000/5000], D Loss: 1.7083152532577515, G Loss: 0.8119560480117798
Epoch [1500/5000], D Loss: 0.9652946591377258, G Loss: 1.0031538009643555
Epoch [2000/5000], D Loss: 1.0914332866668701, G Loss: 0.9929912686347961
Epoch [2500/5000], D Loss: 0.9971930980682373, G Loss: 1.1034932136535645
Epoch [3000/5000], D Loss: 0.9722195863723755, G Loss: 1.1693158149719238
Epoch [3500/5000], D Loss: 0.5702064037322998, G Loss: 1.6434862613677979
Epoch [4000/5000], D Loss: 0.5853217244148254, G Loss: 1.274367094039917
Epoch [4500/5000], D Loss: 0.6038531064987183, G Loss: 1.3577046394348145
Synthetic data generated and saved to synthetic_data.csv


In [7]:

csv_file = "yosys_features.csv"  # Change this to your actual file name
df = pd.read_csv(csv_file)

csv_file2 = "synthetic_data.csv"
df2 = pd.read_csv(csv_file2)

# Drop the "File Name" column as it's not useful for prediction
df = df.drop(columns=["File Name"])
df = pd.concat([df, df2])

# Separate features and target variable
X = df.drop(columns=["Logic Depth"])
y = df["Logic Depth"]

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [8]:
# random forest

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Predict for new data
def predict_new_data(new_data):
    new_data_scaled = scaler.transform([new_data])
    return model.predict(new_data_scaled)[0]

# Example prediction
new_sample = X.iloc[0].values  # Taking first row for example
predicted_logic_depth = predict_new_data(new_sample)
print(f"Predicted Logic Depth for sample: {predicted_logic_depth}")


Mean Absolute Error: 0.13643825864090917
Mean Squared Error: 0.24154910426510268
R^2 Score: 0.8475870549791731
Predicted Logic Depth for sample: 1.001147251




In [10]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
csv_file = "yosys_features.csv"  # Change this to your actual file
df = pd.read_csv(csv_file)

csv_file2 = "synthetic_data.csv"
df2 = pd.read_csv(csv_file2)

# Drop the "File Name" column as it's not useful for prediction
df = df.drop(columns=["File Name"])
df = pd.concat([df, df2])

# Separate features and target variable
X = df.drop(columns=["Logic Depth"]).values
y = df["Logic Depth"].values

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Define the neural network model
class LogicDepthModel(nn.Module):
    def __init__(self, input_size):
        super(LogicDepthModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)  # Output layer for regression
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)  # No activation in output layer
        return x

# Instantiate model
input_size = X_train.shape[1]
model = LogicDepthModel(input_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
batch_size = 8
num_batches = len(X_train) // batch_size

for epoch in range(epochs):
    model.train()
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train_tensor[i:i+batch_size]
        y_batch = y_train_tensor[i:i+batch_size]
        
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluate the model
model.eval()
y_pred_tensor = model(X_test_tensor).detach().numpy()
y_test_numpy = y_test_tensor.numpy()

mae = mean_absolute_error(y_test_numpy, y_pred_tensor)
mse = mean_squared_error(y_test_numpy, y_pred_tensor)
r2 = r2_score(y_test_numpy, y_pred_tensor)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Function to predict new data
def predict_new_data(new_data):
    new_data_scaled = scaler.transform([new_data])
    new_data_tensor = torch.tensor(new_data_scaled, dtype=torch.float32)
    model.eval()
    return model(new_data_tensor).item()

# Example prediction
new_sample = X[0]  # Take first row as an example
predicted_logic_depth = predict_new_data(new_sample)
print(f"Predicted Logic Depth for sample: {predicted_logic_depth}")


Epoch 10/100, Loss: 0.0057
Epoch 20/100, Loss: 0.0045
Epoch 30/100, Loss: 0.0010
Epoch 40/100, Loss: 0.0017
Epoch 50/100, Loss: 0.0025
Epoch 60/100, Loss: 0.0006
Epoch 70/100, Loss: 0.0005
Epoch 80/100, Loss: 0.0005
Epoch 90/100, Loss: 0.0005
Epoch 100/100, Loss: 0.0023
Mean Absolute Error: 0.16908623278141022
Mean Squared Error: 0.24735455214977264
R^2 Score: 0.8439239281075765
Predicted Logic Depth for sample: 0.9938473105430603


In [13]:
# Convert data into DMatrix format for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters
params = {
    "objective": "reg:squarederror",  # Regression task
    "eval_metric": "mae",
    "max_depth": 6,
    "learning_rate": 0.1,
    "n_estimators": 100
}

# Train the XGBoost model
model = xgb.train(params, dtrain, num_boost_round=100)

# Make predictions
y_pred = model.predict(dtest)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Function to predict new data
def predict_new_data(new_data):
    new_data_scaled = scaler.transform([new_data])
    dnew = xgb.DMatrix(new_data_scaled)
    return model.predict(dnew)[0]

# Example prediction
new_sample = X[0]  # Take first row as an example
predicted_logic_depth = predict_new_data(new_sample)


Mean Absolute Error: 0.12522097511731928
Mean Squared Error: 0.18661903171923086
R^2 Score: 0.8822468983778701


Parameters: { "n_estimators" } are not used.

