# Joular and CK data analysis

In [29]:
import requests
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from scipy import stats
import plotly.graph_objects as go

BASE_URL = "http://localhost:8080/api/v1"

# Exploration of data

- **First objective**: aggregate all the joular data for each method and keep the methods having 30 values
- **Second objective**: remove all the aberrant values like the values that are too high or too low compared to the other values
- **Third objective**: sort all the data in function of their values in descending order
- **Fourth objective**: for a few methods (those having the highest values), compare the distribution of the values with the CK analysis of this method

## Aggregate the joular values and getting the methods with 30 values

In [12]:
def getEndpoint(endpoint, params=None):
    URL = BASE_URL + endpoint
    r = requests.get(url=URL, params=params)
    return r.json()

def getAllJoularDataForMethodsHaving30Values(sha=""):
    allResults = []
    endpoint = "/joular/aggregates/"
    if (sha != ""):
        endpoint += "by-commit/" + sha
    result = getEndpoint(endpoint)
    allResults.extend(result["content"])
    while(not result["last"]):
        page = result["number"] + 1
        print("Calling page ", page)
        result = getEndpoint(endpoint, {"page": page})
        allResults.extend(result["content"])
    return allResults

def getSeveralCkDataForOneMethod(commitSha: str, className: str, methodSignature: str, ckMetrics: list[str]):
    endpoint = "/ck-entities/by-commit-and-ast-elem/" + commitSha
    params = {
        "className": className,
        "methodSignature": methodSignature,
        "names": ckMetrics
    }
    return getEndpoint(endpoint=endpoint, params=params)

def createBoxplotJoular(repositoryName, methodSignature, allValues):
    df = pd.DataFrame({"allValues":allValues})
    fig = px.box(df, y="allValues", title="Joular values for " + methodSignature + " of repository " +repositoryName, points="outliers")
    fig.show()

In [13]:
def removeOutliersByZScore(data, threshold=3):
    zScores = np.abs(stats.zscore(data))
    #zScores = np.abs((data - np.mean(data)) / np.std(data))
    """boolScore = zScores < threshold
    for i in range(len(data)):
        print(str(data[i]) + "   " + str(zScores[i]) + "  " + str(boolScore[i]))"""
    return data[zScores < threshold]

In [14]:
def removeOutliersByIQR(allValues):
    df = pd.DataFrame({"allValues":allValues})
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    """print("Q1 = ", Q1["allValues"])
    print("Q3 = ", Q3["allValues"])
    print("IQR = ", IQR["allValues"])
    print("Lower limit = ", Q1["allValues"] - 1.5 * IQR["allValues"])
    print("Upper limit = ", Q3["allValues"] + 1.5 * IQR["allValues"])"""
    return df[~((df < (Q1 - 1.5 * IQR))|(df > (Q3 + 1.5 * IQR))).any(axis=1)]

### Data for all repositories

In [15]:
allData = getAllJoularDataForMethodsHaving30Values()
print(allData)
joularDataRepo = sorted(allData, key=lambda x: x["allValues"], reverse=True)
joularDataRepo = []
for methodData in joularDataRepo:
    repositoryName = methodData["commit"]["repository"]["name"]
    methodSignature = methodData["measurableElement"]["methodSignature"]
    allValues = methodData["allValues"]
    allValuesAfterIQR = removeOutliersByIQR(allValues)
    #print(len(allValuesAfterIQR))
    if (len(allValuesAfterIQR) == 30):
        joularDataRepo.append(methodData)

    #createBoxplotJoular(repositoryName, methodSignature, allValues)
    #createBoxplotJoular(repositoryName, methodSignature, allValuesWithoutOutliers)
    #createBoxplotJoular(repositoryName, methodSignature, allValuesAfterIQR["allValues"])
print(len(joularDataRepo))

dataAllRepositories = []
for doc in joularDataRepo:
    dataAllRepositories.extend({"Repository": doc["commit"]["repository"]["name"], "Values":val, "Class": doc["measurableElement"]["className"]} for val in doc["allValues"])
ckDataFrame = pd.DataFrame(dataAllRepositories)

figRepository = px.violin(ckDataFrame, y="Values", x="Repository", points="all")
figRepository.show()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

### Aggregate by repository and comparison with CK values

In [16]:
def sortData(data):
    return sorted(data, key=lambda x: x["allValues"], reverse=True)

def removeOutliers(data):
    print("Len with outliers : ", len(data))
    only30Values = []
    for methodData in data:
        allValues = methodData["allValues"]
        allValuesAfterIQR = removeOutliersByIQR(allValues)
        #print(len(allValuesAfterIQR))
        if (len(allValuesAfterIQR) == 30):
            only30Values.append(methodData)
    print("Len without outliers : ", len(only30Values))
    return only30Values

def getTheFirstHighestValues(data, end=5):
    return data[:end]

def createViolinJoular(data, granularity="Repository", points="all"):
    dataAllRepositories = []
    for doc in data:
        dataAllRepositories.extend({"Repository": doc["commit"]["repository"]["name"], "Values":val, "Class": doc["measurableElement"]["className"]} for val in doc["allValues"])
    df = pd.DataFrame(dataAllRepositories)

    figRepository = px.violin(df, y="Values", x=granularity, points=points)
    figRepository.show()

def createMultipleBoxplot(dataOneRepo):
    dataframePreparation = []
    for method in dataOneRepo:
        dataframePreparation.append({"Method": method["measurableElement"]["methodSignature"], "Values":method["allValues"], "Class":method["measurableElement"]["className"]})
    df = pd.DataFrame(dataframePreparation)
    print(df)
    fig = px.box(df, x="Method", y="Values", points="outliers")
    fig.show()
        


def createBarChart(dataframe, xAxisName, yAxisName, title):
    fig = px.bar(dataframe, x=xAxisName, y=yAxisName, title=title)
    fig.update_layout(yaxis_range=[0, 40])
    return fig

def prepareCkDataFrame(ckValues):
    values = {item["name"]:item["value"] for item in ckValues}
    return pd.DataFrame(list(values.items()), columns=["Metric", "Value"])

def createBoxplot(dataframe, title):
    return px.box(dataframe, y="value", title=title, points="all")

def prepareJoularDataFrame(joularValues):
    return pd.DataFrame({"value": joularValues})

def createSubplot(fig1, fig2, title):
    fig1Traces = []
    fig2Traces = []
    for trace in range(len(fig1["data"])):
        fig1Traces.append(fig1["data"][trace])
    for trace in range(len(fig2["data"])):
        fig2Traces.append(fig2["data"][trace])

    
    figure = make_subplots(rows=1, cols=2, subplot_titles=("Joular values", "Ck values"))
    for traces in fig1Traces:
        figure.append_trace(traces, row=1, col=1)
    for traces in fig2Traces:
        figure.append_trace(traces, row=1, col=2)

    figure.update_layout(title_text=title)
    figure.show()

In [90]:
def createJoularAndCkDataframe(dataOneRepo, sha, allowedCkMetrics):
    dataframe = []
    for method in dataOneRepo:
        allMethodData = {}
        className = method["measurableElement"]["className"]
        methodSignature = method["measurableElement"]["methodSignature"]

        # Joular values
        allMethodData["Method"] = methodSignature
        allMethodData["Class"] = className
        allMethodData["JoularValues"] = method["allValues"]

        # Ck values
        ckDataMethod = getSeveralCkDataForOneMethod(commitSha=sha, className=className, methodSignature=methodSignature, ckMetrics=allowedCkMetrics)
        for metric in ckDataMethod:
            metricName = metric["name"]
            metricValue = metric["value"]
            allMethodData[metricName] = metricValue
        dataframe.append(allMethodData)

    return dataframe


In [149]:
def createFigure(dataframe, ckMetric, repository):    
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    for doc in dataframe:
        name = doc["Method"][:30]
        traceBoxplot = go.Box(y=doc["JoularValues"], name=name, marker=dict(color='rgb(252,141,98)'), legendgroup="boxplots")
        fig.add_trace(traceBoxplot, secondary_y=True)
        traceBarchart = go.Bar(x=[name], y=[doc[ckMetric]], name=ckMetric, marker=dict(color='rgb(141,160,203)'), legendgroup="barcharts")
        fig.add_trace(traceBarchart)
    fig['layout'].update(legend=dict(traceorder='normal'))
    fig['layout'].update(title= "Comparison of joular data and '" + ckMetric + "' metric for repository " + repository)
    fig.show()

In [150]:
allCommitSha = ["066f4cf207359e06d30911a553dedd054aef595c", "5c9d8989f968d0ee3a942b411ef7fe121ed94609", "12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "3ed1f1a064a10e53adc2ad8c0b46a4b2c148ee21", "59e5152722198526c6ffe5361de7d1a6a87275c7"]
#allCommitSha = ["066f4cf207359e06d30911a553dedd054aef595c"]
allowedCkMetrics = ["cbo", "loc", "wmc"]

for sha in allCommitSha:
    joularDataRepo = getAllJoularDataForMethodsHaving30Values(sha)
    sortedJoularDataRepo = sortData(joularDataRepo)
    joularDataRepoWithoutOutliers = removeOutliers(sortedJoularDataRepo)
    #createViolinJoular(dataRepoWithoutOutliers)
    joularDataRepoWithoutOutliers = getTheFirstHighestValues(joularDataRepoWithoutOutliers, 8)
    #createMultipleBoxplot(joularDataRepoWithoutOutliers)
    #createViolinJoular(dataRepoWithoutOutliers, "Class", "outliers")

    dataframe = createJoularAndCkDataframe(joularDataRepoWithoutOutliers, sha, allowedCkMetrics)
    print(dataframe)
    createFigure(dataframe, "loc", joularDataRepoWithoutOutliers[0]["commit"]["repository"]["name"])
    createFigure(dataframe, "cbo", joularDataRepoWithoutOutliers[0]["commit"]["repository"]["name"])
    createFigure(dataframe, "wmc", joularDataRepoWithoutOutliers[0]["commit"]["repository"]["name"])


    """for data in dataRepoWithoutOutliers:
        print(data)
        className = data["measurableElement"]["className"]
        methodSignature = data["measurableElement"]["methodSignature"]
        
        # JOULAR DATA
        joularData = data["allValues"]
        joularDataFrame = prepareJoularDataFrame(joularData)
        figBoxPlot = createBoxplot(joularDataFrame, "Joular values for " + methodSignature + " (" + className + ")")

        # CK DATA
        ckValues = getSeveralCkDataForOneMethod(commitSha=sha, className=className, methodSignature=methodSignature, ckMetrics=allowedCkMetrics)
        ckDataFrame = prepareCkDataFrame(ckValues)
        figBarChart = createBarChart(ckDataFrame, "Metric", "Value", "Ck metrics for " + methodSignature + " (" + className + ")")

        # SUB PLOT
        title = "Method " + methodSignature + " of the class " + className
        createSubplot(figBoxPlot, figBarChart, title)"""


Calling page  1
Len with outliers :  197
Len without outliers :  93
[{'Method': 'scan/2[spoon.reflect.path.CtRole,java.util.Collection<? extends spoon.reflect.declaration.CtElement>]', 'Class': 'spoon.reflect.visitor.EarlyTerminatingScanner', 'JoularValues': [2109.9353, 2580.8672, 1992.2736, 2085.4592, 2587.01, 2118.5796, 2087.088, 2488.8872, 2529.0112, 2514.089, 2108.82, 2643.9514, 2247.597, 2099.9507, 2037.3049, 2129.1768, 2202.229, 2109.487, 2261.734, 2128.6135, 2747.6675, 2144.3489, 2146.559, 2512.0806, 2193.447, 2201.8904, 2769.0137, 2007.5638, 2310.417, 2193.6514], 'loc': 11.0, 'wmc': 5.0, 'cbo': 3.0}, {'Method': 'scanCtReference/1[spoon.support.visitor.equals.CtReference]', 'Class': 'spoon.support.visitor.equals.EqualsChecker', 'JoularValues': [324.4667, 319.6152, 261.6414, 287.9847, 364.9052, 276.3988, 330.2696, 348.5787, 388.358, 350.1808, 282.6285, 319.4237, 271.5302, 315.5053, 284.1926, 309.2618, 330.9494, 290.9835, 288.9638, 236.2969, 344.5818, 292.3302, 294.5574, 383.42618

Len with outliers :  76
Len without outliers :  32
[{'Method': 'correlateByWords/2[java.lang.String,java.lang.String]', 'Class': 'org.jabref.logic.database.DuplicateCheck', 'JoularValues': [109.749405, 123.503296, 105.668396, 122.218506, 110.789505, 105.7037, 88.0346, 96.5299, 109.9906, 102.5186, 94.9471, 122.4103, 110.3614, 149.202, 100.662506, 96.467896, 108.336395, 130.94489, 96.8728, 142.29599, 117.6544, 117.76, 111.7433, 116.8712, 110.956, 132.6521, 98.000305, 138.413, 101.8049, 114.8927], 'loc': 14.0, 'wmc': 3.0, 'cbo': 1.0}, {'Method': 'setUp/0', 'Class': 'org.jabref.cli.AuxCommandLineTest', 'JoularValues': [108.6305, 98.3913, 98.205, 97.3779, 94.3094, 95.4783, 85.2979, 107.849, 90.9143, 99.0826, 124.4896, 104.6387, 126.1231, 95.1528, 91.463, 115.5113, 97.1995, 109.9923, 84.151, 95.5857, 91.0406, 90.5464, 88.9281, 84.1854, 107.3451, 92.1697, 102.836, 93.7997, 104.2619, 109.5686], 'loc': 3.0, 'wmc': 1.0, 'cbo': 2.0}, {'Method': 'writeToIndex/2[org.jabref.logic.pdf.search.indexing

Len with outliers :  73
Len without outliers :  15
[{'Method': 'findClass/1[java.lang.String]', 'Class': 'org.hibernate.orm.test.bootstrap.registry.classloading.IsolatedClassLoader', 'JoularValues': [463.8108, 508.5263, 466.7141, 608.1737, 509.75958, 485.7152, 640.63824, 456.9913, 737.8087, 388.88422, 598.8286, 520.0781, 374.9727, 286.1817, 475.00812, 453.54208, 404.50558, 540.2884, 307.286, 599.3375, 384.4495, 375.6719, 553.6133, 274.8325, 599.3805, 513.4359, 209.6569, 478.3937, 564.5434, 494.37592], 'loc': 13.0, 'wmc': 3.0, 'cbo': 1.0}, {'Method': 'init/0', 'Class': 'org.hibernate.orm.test.internal.util.ReflectHelperTest', 'JoularValues': [79.2474, 33.324303, 99.7014, 20.7368, 97.6481, 60.2264, 105.8268, 40.4414, 72.4221, 31.663498, 28.2784, 58.184498, 49.9766, 76.9711, 35.497, 52.413197, 78.892, 83.4939, 43.6744, 47.4367, 56.043205, 44.0588, 127.62701, 99.903, 38.6459, 109.6185, 39.849697, 36.9334, 50.444298, 45.550297], 'loc': 9.0, 'wmc': 1.0, 'cbo': 5.0}, {'Method': 'resolveLocalX

Calling page  1
Len with outliers :  177
Len without outliers :  81
[{'Method': 'checkEchoRequest/0', 'Class': 'org.springframework.boot.rsocket.netty.NettyRSocketServerFactoryTests', 'JoularValues': [267.5926, 212.9435, 217.8468, 243.28929, 221.7853, 205.2166, 253.4735, 255.5202, 284.447, 245.7364, 286.5662, 248.04619, 229.2126, 278.2825, 262.3844, 296.8638, 275.2279, 247.78789, 285.0079, 295.5719, 255.5771, 254.1494, 251.6994, 198.9242, 206.3628, 216.4843, 218.56671, 264.36728, 303.05438, 218.4218], 'loc': 5.0, 'wmc': 1.0, 'cbo': 1.0}, {'Method': 'compile/2[org.springframework.boot.web.servlet.GenericApplicationContext,java.util.function.Consumer<org.springframework.boot.web.servlet.GenericApplicationContext>]', 'Class': 'org.springframework.boot.web.servlet.ServletComponentScanRegistrarTests', 'JoularValues': [225.2544, 177.62689, 136.6362, 182.8941, 243.9455, 238.09239, 201.42142, 195.6048, 216.28929, 160.26721, 212.8877, 207.15039, 268.85577, 204.3462, 181.8789, 215.2957, 235.4573

Len with outliers :  15
Len without outliers :  8
[{'Method': 'setUpDataSource/0', 'Class': 'org.apache.commons.configuration2.DatabaseConfigurationTestHelper', 'JoularValues': [104.258995, 106.782, 105.0801, 91.6607, 93.9621, 104.3268, 97.3097, 110.6919, 96.2922, 113.2199, 106.6211, 98.7757, 92.5542, 94.4055, 90.966, 82.4212, 89.6111, 97.71921, 97.4402, 117.047195, 114.3917, 104.01759, 88.2087, 101.0553, 93.2793, 111.8563, 112.820595, 111.2415, 113.444695, 94.1346], 'loc': 21.0, 'wmc': 2.0, 'cbo': 7.0}, {'Method': 'transform/3[javax.xml.transform.Transformer,javax.xml.transform.Source,javax.xml.transform.Result]', 'Class': 'org.apache.commons.configuration2.XMLDocumentHelper', 'JoularValues': [46.3576, 109.8463, 36.3418, 42.8423, 47.6707, 39.604, 121.9423, 66.7676, 34.4446, 104.4137, 29.5514, 81.6877, 111.716, 138.352, 58.8732, 49.3485, 37.7012, 52.4831, 58.6303, 91.8612, 69.4049, 43.8193, 22.4099, 26.2092, 57.0808, 111.914, 53.0701, 54.5148, 81.8963, 103.1982], 'loc': 8.0, 'wmc': 2.0

# Test 

In [31]:
def getJoularDataFromOneMethodFromOneCommit(commitSha, className, methodSignature):
    URL = BASE_URL + "/joular-entities/by-commit-and-ast-elem/" + commitSha
    PARAMS = {"className":className, "methodSignature":methodSignature}
    r = requests.get(url = URL, params = PARAMS)
    return r.json()

In [32]:
def getCkDataFromOneCommit(commitSha, astElem, className, methodSignature=None):
    URL = BASE_URL + "/ck-entities/by-commit-and-ast-elem/" + commitSha
    if methodSignature != None:
        PARAMS = {"astElem": astElem, "className":className, "methodSignature":methodSignature}
    else:
        PARAMS = {"astElem": astElem, "className":className}
    r = requests.get(url = URL, params = PARAMS)
    return r.json()

In [33]:
def createJoularBoxplot(commitSha, className, methodSignature):
    data = getJoularDataFromOneMethodFromOneCommit(commitSha, className, methodSignature)
    values = [item["value"] for item in data["content"] if item["value"] > 0]
    df = pd.DataFrame({"value":values})
    fig = px.box(df, y="value", title="Joular values for " + methodSignature, points='all')
    fig.show()

In [34]:
def createCkBarChart(commitSha, astElem, className, methodSignature):
    NAMES = ["wmc", "cbo", "cboModified", "loc", "fanin", "fanout"]
    data = getCkDataFromOneCommit(commitSha, astElem, className, methodSignature)
    values = {item["name"]:item["value"] for item in data["content"] if item["name"] in NAMES}
    df = pd.DataFrame(list(values.items()), columns=["Metric", "Value"])
    fig = px.bar(df, x="Metric", y="Value", title="Ck metrics for " + methodSignature)
    fig.show()

In [35]:
def createBoxplotAndBarChart(commitSha, astElem, className, methodSignature):
    createJoularBoxplot(commitSha, className, methodSignature)
    createCkBarChart(commitSha, astElem, className, methodSignature)

In [36]:
createBoxplotAndBarChart("3ed1f1a064a10e53adc2ad8c0b46a4b2c148ee21", "method", "org.springframework.boot.web.servlet.server.StaticResourceJars", "isResourcesJar/1[java.util.jar.JarFile]")

createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.engine.jdbc.internal.ResultSetReturnImpl", "executeUpdate/2[java.sql.PreparedStatement,java.lang.String]")

createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.bytecode.internal.bytebuddy.ByteBuddyState", "make/2[TypePool,DynamicType.Builder<?>]")

createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.boot.registry.classloading.internal.AggregatedServiceLoader$ClassPathAndModulePathAggregatedServiceLoader", "hasNextIgnoringServiceConfigurationError/1[java.util.Iterator<?>]")

createBoxplotAndBarChart("3ed1f1a064a10e53adc2ad8c0b46a4b2c148ee21", "method", "org.springframework.boot.web.embedded.tomcat.TomcatWebServer", "initialize/0")

createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.testing.junit4.FailureExpectedHandler", "evaluate/0")

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
createBoxplotAndBarChart("3ed1f1a064a10e53adc2ad8c0b46a4b2c148ee21", "method", "org.springframework.boot.testsupport.classpath.ModifiedClassPathClassLoader", "loadClass/1[java.lang.String]")

In [None]:
createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.boot.registry.classloading.internal.AggregatedClassLoader", "getResources/1[java.lang.String]")

### Data with aberrant negative values

The method *verifyCollection* has the value -530894.

In [12]:
createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.orm.test.bootstrap.registry.classloading.PhantomReferenceLeakDetector", "verifyCollection/3[java.lang.ref.ReferenceQueue<T>,int,int]")

The method *accept* below has the value -217371.5

In [13]:
createBoxplotAndBarChart("12442bd8c7cde6e7c006a6277eeb8e81ad0c2219", "method", "org.hibernate.tool.schema.internal.exec.GenerationTargetToDatabase", "accept/1[java.lang.String]")