In [1]:
import pandas as pd

In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [22]:
def simulate_ppl_data(n=500):
    np.random.seed(42)
    data = {
        'code_change_lines': np.random.poisson(20, n),
        'files_changed': np.random.randint(1, 10, n),
        'unit_t_coverage': np.random.uniform(50, 100, n),
        'num_failed_t': np.random.randint(0, 5, n),
        'build_duration': np.random.normal(10, 2, n),
        'developer_experience': np.random.randint(1, 5, n),
        'failure': np.random.choice([0, 1], n, p=[0.7, 0.3])
    }
    return pd.DataFrame(data)



In [23]:
def engineer_f(df):
    df['t_failure_ratio'] = df['num_failed_t'] / (df['files_changed'] + 1)
    return df.drop(columns=['num_failed_t'])

In [24]:
def tr_model(df):
    X = df.drop(columns=['failure'])
    y = df['failure']
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_tr, y_tr)
    y_pred = model.predict(X_te)
    print("Model Evaluation:\n", classification_report(y_te, y_pred))
    return model, X_te

In [25]:
def suggest_actions(sample):
    sug = []
    if sample['unit_t_coverage'] < 70:
        sug.append("Increase unit test coverage.")
    if sample['t_failure_ratio'] > 0.3:
        sug.append("Investigate failing t.")
    if sample['code_change_lines'] > 50:
        sug.append("Break down large code changes.")
    if not sug:
        sug.append("No issues found, looks good.")
    return sug



In [27]:
if __name__ == "__main__":
    print("Simulating CI/CD Data")
    df = simulate_ppl_data()
    df = engineer_f(df)

    print("\n Training failure prediction model")
    model, X_te = tr_model(df)

    print("\n Predicting and Recommending Fixes on New Builds")
    for i in range(4):
        sample = X_te.iloc[i]
        prediction = model.predict(pd.DataFrame([sample]))[0]
        print(f"\nSample {i+1}:")
        print(sample)
        print(f" Predicted Failure: {'Yes' if prediction else 'No'}")
        if prediction:
            sug = suggest_actions(sample)
            print(" Recommended Actions:")
            for s in sug:
                print(f"- {s}")

Simulating CI/CD Data

 Training failure prediction model
Model Evaluation:
               precision    recall  f1-score   support

           0       0.71      0.86      0.78        71
           1       0.29      0.14      0.19        29

    accuracy                           0.65       100
   macro avg       0.50      0.50      0.48       100
weighted avg       0.59      0.65      0.61       100


 Predicting and Recommending Fixes on New Builds

Sample 1:
code_change_lines       14.000000
files_changed            6.000000
unit_t_coverage         72.584841
build_duration           8.044876
developer_experience     1.000000
t_failure_ratio          0.285714
Name: 361, dtype: float64
 Predicted Failure: No

Sample 2:
code_change_lines       27.000000
files_changed            3.000000
unit_t_coverage         51.778543
build_duration           8.353282
developer_experience     2.000000
t_failure_ratio          0.750000
Name: 73, dtype: float64
 Predicted Failure: No

Sample 3:
code_cha