In [45]:
import json
import pandas as pd

# Load the JSON file
with open('merged_error_logs.json', 'r') as file:
    data = json.load(file)

# Convert the JSON data into a pandas DataFrame
df = pd.DataFrame(data)

# Display the first few rows of the dataframe
df

Unnamed: 0,error,error type,netlist component,suggestion
0,Fatal error: DC Transfer Function: Voltage sou...,Invalid DC analysis statement,DC analysis,Go to KiCadToNgSpice Conversion. Verify that t...
1,Fatal error: DC Transfer Function: Voltage sou...,Invalid DC analysis statement,DC analysis,Go to KiCadToNgSpice Conversion. Verify that t...
2,Fatal error: DC Transfer Function: Voltage sou...,Invalid DC analysis statement,DC analysis,Go to KiCadToNgSpice Conversion. Verify that t...
3,Fatal error: DC Transfer Function: Voltage sou...,Invalid DC analysis statement,DC analysis,Go to KiCadToNgSpice Conversion. Verify that t...
4,Fatal error: DC Transfer Function: Voltage sou...,Invalid DC analysis statement,DC analysis,Go to KiCadToNgSpice Conversion. Verify that t...
...,...,...,...,...
872,Fatal error: instance v45 is a shorted VSRC\nd...,Shorted Voltage Source,Voltage Source,Review the schematic and verify the connection...
873,Fatal error: instance v59 is a shorted VSRC\nd...,Shorted Voltage Source,Voltage Source,Review the schematic and verify the connection...
874,Fatal error: instance v82 is a shorted VSRC\nd...,Shorted Voltage Source,Voltage Source,Review the schematic and verify the connection...
875,Fatal error: instance v7 is a shorted VSRC\ndo...,Shorted Voltage Source,Voltage Source,Review the schematic and verify the connection...


In [46]:

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [47]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['error'])
X = tokenizer.texts_to_sequences(df['error'])

# Padding sequences to ensure uniform length
max_sequence_length = max([len(seq) for seq in X])  # Or set it to a fixed value, like 100 or 200
X_pad = pad_sequences(X, maxlen=max_sequence_length)

# Label Encoding (Error types)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['error type'])
label_map = {label: idx for idx, label in enumerate(df["error type"].unique())}


In [48]:
y

array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,
        3,  3,  3,  3,  3

In [49]:
# Get the mapping of labels to encoded values
label_map = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

# Print the label mapping
print(label_map)


{'Control Card Error ': 0, 'Invalid DC analysis statement': 1, 'Invalid component parameter or syntax error': 2, 'Invalid parameter in transient statement': 3, 'Invalid start time in transient statement': 4, 'Invalid step time in transient statement': 5, 'Invalid stop time in transient statement': 6, 'Missing Model Definition': 7, 'Model Type Mismatch': 8, 'Short circuit error': 9, 'Shorted Voltage Source': 10, 'Unknown Subcircuit Error': 11}


In [50]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Define LSTM model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(32, activation="relu"),
    Dense(16, activation="relu"),
    Dense(12, activation="softmax")  # Multi-class classification
])

# Compile the model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Model summary
model.summary()


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 340, 64)           37312     
                                                                 
 lstm_6 (LSTM)               (None, 340, 128)          98816     
                                                                 
 lstm_7 (LSTM)               (None, 64)                49408     
                                                                 
 dense_9 (Dense)             (None, 32)                2080      
                                                                 
 dense_10 (Dense)            (None, 16)                528       
                                                                 
 dense_11 (Dense)            (None, 12)                204       
                                                                 
Total params: 188348 (735.73 KB)
Trainable params: 188

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)


In [52]:
# Train the LSTM model
model.fit(X_train, y_train, epochs=80, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<keras.src.callbacks.History at 0x7f4664473af0>

In [53]:
import numpy as np
def classify_ngspice_error(error_message):
    seq = tokenizer.texts_to_sequences([error_message])
    padded_seq = pad_sequences(seq, maxlen=max_sequence_length)
    prediction = model.predict(padded_seq)
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]

# Example usage
print(classify_ngspice_error("incorect model type"))


Model Type Mismatch


In [54]:
print(len(label_map.keys()), label_map.keys())

12 dict_keys(['Control Card Error ', 'Invalid DC analysis statement', 'Invalid component parameter or syntax error', 'Invalid parameter in transient statement', 'Invalid start time in transient statement', 'Invalid step time in transient statement', 'Invalid stop time in transient statement', 'Missing Model Definition', 'Model Type Mismatch', 'Short circuit error', 'Shorted Voltage Source', 'Unknown Subcircuit Error'])


In [55]:
# Save the entire model
model.save("ngspice_error_classifier.h5")
import pickle

# Save the entire model
model.save("ngspice_error_classifier.h5")
import pickle

metadata = {
    'max_length': max_sequence_length,
    'tokenizer': tokenizer,
    'label_encoder': label_encoder
}

with open('metadata.pkl', 'wb') as f:
    pickle.dump(metadata, f)

  saving_api.save_model(


In [56]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming you already have a DataFrame 'df' with 'error type', 'netlist component', and 'suggestion'

# Step 1: Encode the features
error_type_encoder = LabelEncoder()
netlist_component_encoder = LabelEncoder()

df.loc[:, 'error_type_encoded'] = error_type_encoder.fit_transform(df['error type'])
df.loc[:, 'netlist_component_encoded'] = netlist_component_encoder.fit_transform(df['netlist component'])

# Step 2: Encode the target variable (suggestion)
suggestion_encoder = LabelEncoder()
df.loc[:, 'suggestion_encoded'] = suggestion_encoder.fit_transform(df['suggestion'])

# Step 3: Prepare the features (X) and target (y)
X = df[['error_type_encoded', 'netlist_component_encoded']]  # Features
y = df['suggestion_encoded']  # Target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)  # Using 100 trees
rf_model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = rf_model.predict(X_test)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 100.00%


In [57]:
import joblib

joblib.dump(rf_model, "random_forest_model.pkl")  # Save the trained model
joblib.dump(error_type_encoder, "error_type_encoder.pkl")  # Save encoders
joblib.dump(netlist_component_encoder, "netlist_component_encoder.pkl")
joblib.dump(suggestion_encoder, "suggestion_encoder.pkl")
joblib.dump(X.columns.tolist(), "feature_columns.pkl")  # Save feature names


['feature_columns.pkl']