<a href="https://colab.research.google.com/github/NishthaMi/PythonAssignment1/blob/main/HYBRID_ENTROPY_FLAKEFLAGGER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rf FlakeFlagger
!git clone https://github.com/AlshammariA/FlakeFlagger.git
!ls FlakeFlagger


Cloning into 'FlakeFlagger'...
remote: Enumerating objects: 21897, done.[K
remote: Counting objects: 100% (472/472), done.[K
remote: Compressing objects: 100% (231/231), done.[K
remote: Total 21897 (delta 215), reused 460 (delta 204), pack-reused 21425 (from 1)[K
Receiving objects: 100% (21897/21897), 16.02 MiB | 10.90 MiB/s, done.
Resolving deltas: 100% (7295/7295), done.
Updating files: 100% (21926/21926), done.
flakiness-predicter  README.md		     test-rerun-scripts
LICENSE		     test-feature-collector


In [None]:
# show projects list
!ls -1 FlakeFlagger/flakiness-predicter/input_data/original_tests

# show contents (folders/files) of the example project
!ls -la FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp


activiti-activiti
Alluxio-alluxio
apache-ambari
apache-commons-exec
apache-hbase
apache-httpcore
apache-incubator-dubbo
doanduyhai-Achilles
elasticjob-elastic-job-lite
hector-client-hector
jknack-handlebars
joel-costigliola-assertj-core
kevinsawicki-http-request
ninjaframework-ninja
orbit-orbit
qos-ch-logback
spring-projects-spring-boot
square-okhttp
togglz-togglz
tootallnate-java-websocket
undertow-io-undertow
wildfly-wildfly
wro4j-wro4j
zxing-zxing
total 116
drwxr-xr-x  4 root root  4096 Nov 19 06:09 .
drwxr-xr-x 26 root root  4096 Nov 19 06:09 ..
drwxr-xr-x  2 root root 16384 Nov 19 06:09 flakyMethods
drwxr-xr-x  2 root root 94208 Nov 19 06:09 nonFlakyMethods


In [None]:
import os
import math
import pandas as pd
from collections import Counter

# Paths
base = "/content/FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp"
flaky_path = base + "/flakyMethods"
nonflaky_path = base + "/nonFlakyMethods"

def calc_entropy(seq):
    c = Counter(seq)
    total = len(seq)
    probs = [v/total for v in c.values()]
    return -sum(p*math.log2(p) for p in probs if p>0)

def read_folder(path, label):
    data = []
    for fname in os.listdir(path):
        if not fname.endswith(".java"):
            continue
        fpath = os.path.join(path, fname)

        with open(fpath, "r", errors="ignore") as f:
            lines = f.read().splitlines()

        # pseudo outcomes
        pass_count = sum("assert" in l for l in lines)
        fail_count = sum("fail" in l.lower() for l in lines)
        skip_count = sum("skip" in l.lower() for l in lines)

        results = (["PASS"] * pass_count) + (["FAIL"] * fail_count) + (["SKIP"] * skip_count)

        if len(results) < 3:
            results = ["PASS", "FAIL", "PASS"]

        entropy = calc_entropy(results)

        data.append({
            "file": fname,
            "entropy": entropy,
            "label": label
        })

    return data


# Read both
flaky_data = read_folder(flaky_path, 1)
nonflaky_data = read_folder(nonflaky_path, 0)

df = pd.DataFrame(flaky_data + nonflaky_data)
print("Total files inside dataframe:", len(df))
df.head()


Total files inside dataframe: 810


Unnamed: 0,file,entropy,label
0,com.squareup.okhttp.internal.http.ResponseCach...,0.918296,1
1,com.squareup.okhttp.internal.spdy.SpdyConnecti...,-0.0,1
2,com.squareup.okhttp.internal.spdy.SpdyConnecti...,-0.0,1
3,com.squareup.okhttp.internal.http.ResponseCach...,0.918296,1
4,com.squareup.okhttp.internal.http.ResponseCach...,-0.0,1


In [None]:
import os
import math
import pandas as pd
from collections import Counter

# ---- Folder paths ----
base_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp"
flaky_path = os.path.join(base_path, "flakyMethods")
nonflaky_path = os.path.join(base_path, "nonFlakyMethods")

print("Flaky path exists:", os.path.exists(flaky_path))
print("Non-flaky path exists:", os.path.exists(nonflaky_path))

# ---- Entropy function ----
def calculate_entropy(values):
    total = len(values)
    counts = Counter(values)
    probs = [c / total for c in counts.values()]
    return -sum(p * math.log2(p) for p in probs if p > 0)

# ---- Analyze folder ----
def analyze_folder(folder_path, label):
    data = []
    files = sorted(os.listdir(folder_path))
    print(f"üìÅ Reading {len(files)} files from: {folder_path}")

    for filename in files:
        if filename.endswith(".java"):
            file_path = os.path.join(folder_path, filename)

            with open(file_path, "r", errors="ignore") as f:
                lines = f.read().splitlines()

                # Fake heuristic for PASS / FAIL / SKIP simulation
                p = sum("assert" in l for l in lines)
                f_cnt = sum("fail" in l.lower() for l in lines)
                s = sum("skip" in l.lower() for l in lines)

                results = ["PASS"] * p + ["FAIL"] * f_cnt + ["SKIP"] * s
                if len(results) < 3:
                    results = ["PASS", "FAIL", "PASS"]

                ent = calculate_entropy(results)

                data.append({
                    "Test_File": filename,
                    "Entropy": ent,
                    "Label": label
                })

    return data

# ---- Compute for both folders ----
flaky_data = analyze_folder(flaky_path, "flaky")
nonflaky_data = analyze_folder(nonflaky_path, "nonflaky")

df = pd.DataFrame(flaky_data + nonflaky_data)

print("\n‚úÖ Entropy DataFrame Created!")
print(df.head())
print("\nTotal Rows:", len(df))


Flaky path exists: True
Non-flaky path exists: True
üìÅ Reading 100 files from: FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/flakyMethods
üìÅ Reading 710 files from: FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/nonFlakyMethods

‚úÖ Entropy DataFrame Created!
                                           Test_File   Entropy  Label
0     com.squareup.okhttp.AsyncApiTest-redirect.java -0.000000  flaky
1  com.squareup.okhttp.internal.http.HttpResponse...  0.918296  flaky
2  com.squareup.okhttp.internal.http.HttpResponse...  0.918296  flaky
3  com.squareup.okhttp.internal.http.HttpResponse... -0.000000  flaky
4  com.squareup.okhttp.internal.http.HttpResponse...  0.918296  flaky

Total Rows: 810


In [None]:
import numpy as np

# Convert textual labels to binary
# flaky ‚Üí 1   |   nonflaky ‚Üí 0
df['Label_Binary'] = df['Label'].apply(lambda x: 1 if x == "flaky" else 0)

# True labels
y_true = df['Label_Binary'].values

# Entropy values
entropy_values = df['Entropy'].values

# Choose threshold (same as CANNIER)
threshold = np.percentile(entropy_values, 75)  # Top 25% entropy = flaky

print("Entropy threshold:", threshold)

# Predicted labels
y_pred = (entropy_values >= threshold).astype(int)


Entropy threshold: 0.9182958340544896


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)
cm = confusion_matrix(y_true, y_pred)

print("üìä ENTROPY CLASSIFIER RESULTS (FlakeFlagger Dataset)")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_true, y_pred))


üìä ENTROPY CLASSIFIER RESULTS (FlakeFlagger Dataset)
Accuracy: 0.4654320987654321
Precision: 0.16227180527383367
Recall: 0.8
F1 Score: 0.26981450252951095

Confusion Matrix:
 [[297 413]
 [ 20  80]]

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.42      0.58       710
           1       0.16      0.80      0.27       100

    accuracy                           0.47       810
   macro avg       0.55      0.61      0.42       810
weighted avg       0.84      0.47      0.54       810



In [None]:
import os

flaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/flakyMethods"
nonflaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/nonFlakyMethods"

print("Flaky exists:", os.path.exists(flaky_path))
print("NonFlaky exists:", os.path.exists(nonflaky_path))


Flaky exists: True
NonFlaky exists: True


In [None]:
import os, math, pandas as pd
from collections import Counter

flaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/flakyMethods"
nonflaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/nonFlakyMethods"

print("Flaky exists:", os.path.isdir(flaky_path))
print("NonFlaky exists:", os.path.isdir(nonflaky_path))


Flaky exists: True
NonFlaky exists: True


In [None]:
def entropy(values):
    total = len(values)
    c = Counter(values)
    return -sum((v/total) * math.log2(v/total) for v in c.values() if v>0)


In [None]:
import numpy as np

def simulate_runs(file_path, label):
    with open(file_path, "r", errors="ignore") as f:
        lines = f.read().splitlines()

    pass_cnt = sum("assert" in l for l in lines)
    fail_cnt = sum("fail" in l.lower() for l in lines)
    skip_cnt = sum("skip" in l.lower() for l in lines)

    # Base probabilities
    if label == "flaky":
        p_fail = min(0.4 + fail_cnt*0.01, 0.8)
        p_skip = min(0.2 + skip_cnt*0.01, 0.5)
    else:
        p_fail = min(0.05 + fail_cnt*0.005, 0.2)
        p_skip = min(0.05 + skip_cnt*0.005, 0.1)

    p_pass = 1 - (p_fail + p_skip)

    outcomes = np.random.choice(
        ["PASS","FAIL","SKIP"],
        size=30,
        p=[p_pass, p_fail, p_skip]
    )
    return outcomes


In [None]:
import os
import math
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Correct dataset paths
flaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/flakyMethods"
nonflaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/nonFlakyMethods"

print("Flaky exists:", os.path.exists(flaky_path))
print("NonFlaky exists:", os.path.exists(nonflaky_path))


Flaky exists: True
NonFlaky exists: True


In [None]:
def calc_entropy(values):
    total = len(values)
    counts = Counter(values)
    probs = [c/total for c in counts.values()]
    return -sum(p * math.log2(p) for p in probs if p > 0)


In [None]:
def extract_hybrid_features(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.read().splitlines()

        # Feature 1 ‚Äî Line content entropy (PASS/FAIL/SKIP simulation)
        pass_count = sum("assert" in l for l in lines)
        fail_count = sum("fail" in l.lower() for l in lines)
        skip_count = sum("skip" in l.lower() for l in lines)
        outcomes = ['PASS'] * pass_count + ['FAIL'] * fail_count + ['SKIP'] * skip_count
        if len(outcomes) < 3:
            outcomes = ['PASS', 'FAIL', 'PASS']
        entropy1 = calc_entropy(outcomes)

        # Feature 2 ‚Äî Token entropy
        tokens = " ".join(lines).split()
        if len(tokens) < 3:
            tokens = ["a", "b", "a"]
        entropy2 = calc_entropy(tokens)

        # Feature 3 ‚Äî Line length entropy
        lengths = [len(l) for l in lines] or [1, 2, 1]
        entropy3 = calc_entropy(lengths)

        return entropy1, entropy2, entropy3

    except Exception as e:
        print("Error:", e)
        return None


In [None]:
data = []

# flaky = 1, non-flaky = 0
for folder, label in [(flaky_path, 1), (nonflaky_path, 0)]:
    for f in os.listdir(folder):
        if f.endswith(".java"):
            fpath = os.path.join(folder, f)
            e1, e2, e3 = extract_hybrid_features(fpath)
            data.append([f, e1, e2, e3, label])

df = pd.DataFrame(data, columns=["File", "Entropy1", "Entropy2", "Entropy3", "Label"])
print(df.head())
print("Total rows:", len(df))


                                                File  Entropy1  Entropy2  \
0  com.squareup.okhttp.internal.http.ResponseCach...  0.918296  4.262692   
1  com.squareup.okhttp.internal.spdy.SpdyConnecti... -0.000000  4.169925   
2  com.squareup.okhttp.internal.spdy.SpdyConnecti... -0.000000  4.754888   
3  com.squareup.okhttp.internal.http.ResponseCach...  0.918296  3.459432   
4  com.squareup.okhttp.internal.http.ResponseCach... -0.000000  5.534621   

   Entropy3  Label  
0  2.750000      1  
1  3.121928      1  
2  3.875000      1  
3  1.584963      1  
4  3.016151      1  
Total rows: 810


In [None]:
df["HybridEntropy"] = (
    0.5 * df["Entropy1"] +
    0.3 * df["Entropy2"] +
    0.2 * df["Entropy3"]
)


In [None]:
threshold = df["HybridEntropy"].median()
df["Pred"] = (df["HybridEntropy"] > threshold).astype(int)

y_true = df["Label"].values
y_pred = df["Pred"].values


In [None]:
import os
import math
import numpy as np
import pandas as pd
from collections import Counter

flaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/flakyMethods"
nonflaky_path = "FlakeFlagger/flakiness-predicter/input_data/original_tests/square-okhttp/nonFlakyMethods"

# ---------- Entropy function ----------
def entropy(values):
    c = Counter(values)
    probs = [v / len(values) for v in c.values()]
    return -sum(p * math.log2(p) for p in probs if p > 0)

# ---------- Extract features from Java test ----------
def extract_features(filepath):
    try:
        with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
            text = f.read().lower()
    except:
        return (0, 0, 0, 0)

    num_assert = text.count("assert")
    num_fail = text.count("fail")
    num_sleep = text.count("sleep")
    length = len(text.split())

    return (num_assert, num_fail, num_sleep, length)

# ---------- Generate synthetic test outcomes ----------
def simulate_runs(features, num_runs=30):
    asserts, fails, sleeps, length = features

    instability = fails*2 + sleeps + (asserts*0.1)
    instability = min(instability, 15)

    p_fail = instability / 15
    p_fail = min(max(p_fail, 0.01), 0.99)

    results = []
    for _ in range(num_runs):
        outcome = "FAIL" if np.random.rand() < p_fail else "PASS"
        results.append(outcome)

    return results

# ---------- Build hybrid dataset ----------
def process_folder(folder, label):
    rows = []
    for fname in os.listdir(folder):
        if fname.endswith(".java"):
            fpath = os.path.join(folder, fname)
            f = extract_features(fpath)
            sim = simulate_runs(f, 30)
            ent = entropy(sim)

            rows.append({
                "file": fname,
                "entropy": ent,
                "asserts": f[0],
                "fails": f[1],
                "sleeps": f[2],
                "length": f[3],
                "label": label
            })
    return rows

hy_flaky = process_folder(flaky_path, 1)
hy_non = process_folder(nonflaky_path, 0)

df = pd.DataFrame(hy_flaky + hy_non)

print("Hybrid entropy dataset created, shape:", df.shape)
print(df.head())

# ---------- Train Hybrid ML model ----------
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

X = df[["entropy", "asserts", "fails", "sleeps", "length"]]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y)

clf = RandomForestClassifier(n_estimators=300)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print("\nüìä Hybrid Entropy ML Results")
print("Accuracy:", accuracy_score(y_test, pred))
print("\nConfusion:\n", confusion_matrix(y_test, pred))
print("\nReport:\n", classification_report(y_test, pred))


Hybrid entropy dataset created, shape: (810, 7)
                                                file   entropy  asserts  \
0  com.squareup.okhttp.internal.http.ResponseCach... -0.000000        2   
1  com.squareup.okhttp.internal.spdy.SpdyConnecti...  0.210842        3   
2  com.squareup.okhttp.internal.spdy.SpdyConnecti... -0.000000        6   
3  com.squareup.okhttp.internal.http.ResponseCach... -0.000000        1   
4  com.squareup.okhttp.internal.http.ResponseCach...  0.353359        7   

   fails  sleeps  length  label  
0      0       0      23      1  
1      0       0      18      1  
2      0       0      27      1  
3      0       0      11      1  
4      0       0      58      1  

üìä Hybrid Entropy ML Results
Accuracy: 0.8620689655172413

Confusion:
 [[175   3]
 [ 25   0]]

Report:
               precision    recall  f1-score   support

           0       0.88      0.98      0.93       178
           1       0.00      0.00      0.00        25

    accuracy              