In [2]:
import numpy as np
import pandas as pd
import lizard
import torch
import torch.nn as nn
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from Static_code_analyzer import static_code_analyzer

In [None]:
# ====== Load Dataset ======
df = pd.read_csv('Datasets/processed_jm1.csv')
X = df.drop("defects", axis=1).values
y = df["defects"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
scaler = joblib.load("models/scaler.pkl")
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [6]:
class ANN(nn.Module):
    def __init__(self, input_dim):
        super(ANN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.layers(x)

In [7]:
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=32, num_layers=1):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = out[:, -1, :]  # last output
        out = self.fc(out)
        return out

In [8]:
class MetaANN(nn.Module):
    def __init__(self):
        super(MetaANN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        return self.fc(x)

In [None]:
#Import models
rf_model = joblib.load("models/rf_model.pkl")
xgb_model = joblib.load("models/xgb_model.pkl")

input_dim = X_train_scaled.shape[1]

ann_model = ANN(input_dim)
ann_model.load_state_dict(torch.load('models/ann_model.pth'))

rnn_model = RNN(input_dim)
rnn_model.load_state_dict(torch.load('models/rnn_model.pth'))

meta_model = MetaANN()
meta_model.load_state_dict(torch.load('models/meta_model.pth'))


<All keys matched successfully>

In [None]:
rf_pred = rf_model.predict_proba(X_train_scaled)[:, 1]
xgb_pred = xgb_model.predict_proba(X_train_scaled)[:, 1]

with torch.no_grad():
        ann_pred = torch.sigmoid(
            ann_model(torch.tensor(X_train_scaled, dtype=torch.float32))
        ).numpy().flatten()
        
        rnn_pred = torch.sigmoid(
            rnn_model(torch.tensor(X_train_scaled, dtype=torch.float32).unsqueeze(1))
        ).numpy().flatten()
    
    # Stack predictions
stacked_input = np.vstack((rf_pred, xgb_pred, ann_pred, rnn_pred)).T

# Predict using meta-model
with torch.no_grad():
    meta_output = meta_model(torch.tensor(stacked_input, dtype=torch.float32))
    meta_prob = torch.sigmoid(meta_output).numpy().flatten()
    meta_prediction = (meta_prob > 0.5).astype(int)

# Evaluate the meta-model on Train set
print("=== Stacked Hybrid Evaluation on Train Set ===")
print(f"Accuracy: {accuracy_score(y_train, meta_prediction)*100:.2f}%")
print(f"Precision: {precision_score(y_train, meta_prediction)*100:.2f}%")
print(f"Recall: {recall_score(y_train, meta_prediction)*100:.2f}%")
print(f"F1 Score: {f1_score(y_train, meta_prediction)*100:.2f}%")

=== Stacked Hybrid Evaluation on Train Set ===
Accuracy: 96.45%
Precision: 97.02%
Recall: 95.87%
F1 Score: 96.44%


In [None]:
rf_pred = rf_model.predict_proba(X_test_scaled)[:, 1]
xgb_pred = xgb_model.predict_proba(X_test_scaled)[:, 1]

with torch.no_grad():
        ann_pred = torch.sigmoid(
            ann_model(torch.tensor(X_test_scaled, dtype=torch.float32))
        ).numpy().flatten()
        
        rnn_pred = torch.sigmoid(
            rnn_model(torch.tensor(X_test_scaled, dtype=torch.float32).unsqueeze(1))
        ).numpy().flatten()
    
    # Stack predictions
stacked_input = np.vstack((rf_pred, xgb_pred, ann_pred, rnn_pred)).T

# Predict using meta-model
with torch.no_grad():
    meta_output = meta_model(torch.tensor(stacked_input, dtype=torch.float32))
    meta_prob = torch.sigmoid(meta_output).numpy().flatten()
    meta_prediction = (meta_prob > 0.5).astype(int)

# Evaluate the meta-model on Test set
print("=== Stacked Hybrid Evaluation on Test Set ===")
print(f"Accuracy: {accuracy_score(y_test, meta_prediction)*100:.2f}%")
print(f"Precision: {precision_score(y_test, meta_prediction)*100:.2f}%")
print(f"Recall: {recall_score(y_test, meta_prediction)*100:.2f}%")
print(f"F1 Score: {f1_score(y_test, meta_prediction)*100:.2f}%")

=== Stacked Hybrid Evaluation on Test Set ===
Accuracy: 96.61%
Precision: 96.91%
Recall: 96.18%
F1 Score: 96.54%


In [12]:
def analysis(file_path):
    analysis = lizard.analyze_file(file_path)
    features = []

    for function in analysis.function_list:
        n = function.token_count  
        v_g = function.cyclomatic_complexity
        v = n * np.log2(n + 1) if n > 0 else 0  
        d = v / (n + 1) if n > 0 else 1  
        i = (1 / d) * v if d > 0 else 0  
        e = v * d  
        b = v / 3000  
        t = e / 18  

        comment_lines = 0  # If unavailable, keep at zero
        loc_code_and_comment = function.length + comment_lines  

        features.append({
            "loc": function.length,
            "v(g)": v_g,
            "ev(g)": max(0, v_g - 1),
            "iv(g)": max(0, v_g // 2),
            "n": n,
            "v": v,
            "l": 1 / d if d > 0 else 0,
            "d": d,
            "i": i,
            "e": e,
            "b": b,
            "t": t,
            "lOCode": function.length,
            "lOComment": comment_lines,
            "lOBlank": 0,
            "locCodeAndComment": loc_code_and_comment,
            "uniq_Op": max(1, n // 2),
            "uniq_Opnd": max(1, n // 2),
            "total_Op": max(1, n // 2),
            "total_Opnd": max(1, n // 2),
            "branchCount": function.fan_out
        })

    issues = static_code_analyzer(file_path)  # Smart integration
    return issues, features

In [None]:
def predict_faulty(feature_dict, threshold = 0.5):
    input_df = pd.DataFrame([feature_dict])
    
    input_scaled = scaler.transform(input_df)
    
    rf_pred = rf_model.predict_proba(input_scaled)[:, 1]
    xgb_pred = xgb_model.predict_proba(input_scaled)[:, 1]
    
    with torch.no_grad():
        ann_pred = torch.sigmoid(
            ann_model(torch.tensor(input_scaled, dtype=torch.float32))
        ).numpy().flatten()
        
        rnn_pred = torch.sigmoid(
            rnn_model(torch.tensor(input_scaled, dtype=torch.float32).unsqueeze(1))
        ).numpy().flatten()
    
    print(f"RF prediction: {rf_pred[0]*100:.2f}\nXGB prediction {xgb_pred[0]*100:.2f}\nANN prediction: {ann_pred[0]*100:.2f}\nRNN prediction {rnn_pred[0]*100:.2f}")

    stacked_input = np.vstack((rf_pred, xgb_pred, ann_pred, rnn_pred)).T
    
    with torch.no_grad():
        meta_output = meta_model(torch.tensor(stacked_input, dtype=torch.float32))
        meta_prob = torch.sigmoid(meta_output).numpy().flatten()[0]
        meta_prediction = int(meta_prob > threshold)
    
    # return "Faulty" if final_prediction > threshold else "Not Faulty",
    return {
        "prediction": "Faulty" if meta_prediction > threshold else "Not Faulty",
        "probability": meta_prob*100
    }

In [18]:
#2142
test_input_faulty = {
    "loc": 122.93905462539,
    "v(g)": 16.26649839337088,
    "ev(g)": 8.868012853032942,
    "iv(g)": 13.532996786741764,
    "n": 318.00252409943676,
    "v": 2005.4725250802708,
    "l": 0.03,
    "d": 30.07639742939341,
    "i": 66.78021982918675,
    "e": 60260.60345429485,
    "b": 0.666675080331456,
    "t": 3347.8144516319544,
    "lOCode": 95.93653052595323,
    "lOComment": 5.134511246403824,
    "lOBlank": 19.868012853032944,
    "locCodeAndComment": 0.0,
    "uniq_Op": 25.200504819887353,
    "uniq_Opnd": 54.068517672920294,
    "total_Op": 189.3350160662912,
    "total_Opnd": 128.6675080331456,
    "branchCount": 31.532996786741766
}

#2100
test_input_not_faulty = {
    "loc": 15.0,
    "v(g)": 2.0,
    "ev(g)": 1.0,
    "iv(g)": 2.0,
    "n": 53.0,
    "v": 239.75,
    "l": 0.21,
    "d": 4.81,
    "i": 49.82,
    "e": 1153.79,
    "b": 0.08,
    "t": 64.09,
    "lOCode": 12.0,
    "lOComment": 0.0,
    "lOBlank": 1.0,
    "locCodeAndComment": 0.0,
    "uniq_Op": 7.0,
    "uniq_Opnd": 16.0, 
    "total_Op": 31.0,
    "total_Opnd": 22.0,
    "branchCount": 3.0
}

result_faulty = predict_faulty(test_input_faulty)
print(f"Prediction: {result_faulty['prediction']} (Fault Probability: {result_faulty['probability']:.2f}%)\n")
result_not_faulty = predict_faulty(test_input_not_faulty)
print(f"Prediction: {result_not_faulty['prediction']} (Fault Probability: {result_not_faulty['probability']:.2f}%)")

RF prediction: 91.00
LR prediction 81.91
ANN prediction: 89.85
RNN prediction 88.88
Prediction: Faulty (Fault Probability: 99.93%)

RF prediction: 17.00
LR prediction 38.23
ANN prediction: 14.10
RNN prediction 30.69
Prediction: Not Faulty (Fault Probability: 0.98%)


In [22]:
issues, features = analysis("Present/Examples/faulty_example.c")
if not issues:
    result = predict_faulty(features[1])
    print(f"Prediction: {result['prediction']} (Fault Probability: {result['probability']:.4f})")
else:
    print("Faulty: Static Issues found in the code.")

Faulty: Static Issues found in the code.


In [23]:
issues, features = analysis("Present/Examples/non_faulty_example.c")
if not issues:
    result = predict_faulty(features[1])
    print(f"Prediction: {result['prediction']} (Fault Probability: {result['probability']:.4f})")
else:
    print("Faulty: Static Issues found in the code.")

RF prediction: 45.90
LR prediction 34.71
ANN prediction: 51.80
RNN prediction 17.67
Prediction: Not Faulty (Fault Probability: 46.4218)
