In [130]:
import json
import os
from scipy import stats
from scipy.stats import normaltest, mannwhitneyu, ttest_ind, shapiro
import scipy
import numpy as np
import matplotlib.pyplot as plt
import datashader as ds
from datashader.mpl_ext import dsshow
import pandas as pd


In [131]:
def using_datashader(ax, x, y):
    df = pd.DataFrame(dict(x=x, y=y))
    # print(df)
    dsartist = dsshow(
        df,
        ds.Point("x", "y"),
        ds.count(), 
        # legend_font_size=16,
        # vmin=0,
        # vmax=100,
        # norm="linear",
        norm="log",
        aspect="auto",
        ax=ax,
        width_scale=0.05,
        height_scale=0.05,
    )

    plt.colorbar(dsartist)


In [132]:
PROJECT = ["next", "react", "sveltekit"]
TYPE = ["LAYER", "LENGTH"]
SCORING_TYPE = "MAX"

In [133]:
PROJECT = ["react", "next", "sveltekit"]
TYPE = ["LAYER", "LENGTH"]
# TYPE = "LAYER"
SCORING_TYPE = "MAX"

data_x = []
data_y = []

for ttype in TYPE:
    data_type_x = []
    data_type_y = []
    for project in PROJECT:
        f = open(f'../processed_data/{SCORING_TYPE}/{project}/{ttype}/dim_x.json')
        dataset_1 = json.loads(f.read())
        data_type_x.append(dataset_1)
        f.close()
        f = open(f'../processed_data/{SCORING_TYPE}/{project}/{ttype}/dim_y.json')
        dataset_2 = json.loads(f.read())
        data_type_y.append(dataset_2)
        f.close()
        
        fig, ax = plt.subplots()
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        ax.yaxis.set_ticks([1, 2, 3, 4, 5, 6])
        ax.yaxis.set_ticklabels(['A1', 'A2', 'B1', 'B2', 'C1', 'C2'], fontsize=12)
        title = f"File Depth ({project})" if ttype == 'LAYER' else f"Filename Length ({project})"
        ax.set_title(title, fontsize=14)
        plt.ylabel('Competency Level', fontsize=14)
        if ttype == 'LAYER':
            plt.xlabel('File Depth', fontsize=14)
        else: 
            plt.xlabel(f'Filename Length', fontsize=14)
        x = np.random.normal(size=100000)
        y = x * 3 + np.random.normal(size=100000)
        using_datashader(ax, dataset_1, dataset_2)
        # using_datashader(ax, x, y)
        # plt.show()
        plt.savefig(f"fig/scatter-{project}-{ttype}.pdf", bbox_inches='tight', dpi=150)
        # break
    flatten_x = [item for sublist in data_type_x for item in sublist]
    flatten_y = [item for sublist in data_type_y for item in sublist]
    fig, ax = plt.subplots()
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    ax.yaxis.set_ticks([0, 1, 2, 3, 4, 5, 6, 7])
    ax.yaxis.set_ticklabels(['', 'A1', 'A2', 'B1', 'B2', 'C1', 'C2', ''], fontsize=12)
    title = f"All project with File Depth" if ttype == 'LAYER' else f"All project with Filename Length"
    ax.set_title(title, fontsize=14)
    plt.ylabel('Competency Level', fontsize=14)
    if ttype == 'LAYER':
        plt.xlabel('File Depth', fontsize=14)
    else: 
        plt.xlabel(f'Filename Length', fontsize=14)
    using_datashader(ax, flatten_x, flatten_y)
    # using_datashader(ax, x, y)
    # plt.show()
    plt.savefig(f"fig/scatter-all-{ttype}.pdf", bbox_inches='tight', dpi=150)
    plt.close('all')
    # break
    data_x.append(data_type_x)
    data_y.append(data_type_y)

In [134]:
for score in ["MAX", "ALL"]:
        for project in PROJECT:
                f = open(f'../processed_data/{score}/{project}/LAYER/dim_y.json')
                dataset_1 = json.loads(f.read())
                data_type_y.append(dataset_1)
                f.close()
                # Fixing random state for reproducibility
                # np.random.seed(19680801)

                # mu, sigma = 100, 15
                # x = mu + sigma * np.random.randn(10000)

                # the histogram of the data
                # print(react_dataset)
                # narray = np.array(dataset_1)
                # n, bins, patches = plt.hist(narray)

                # plt.axis([0, 6, 0, 1800])
                plt.xticks(fontsize=12)
                plt.yticks(fontsize=12)
                plt.xlabel('Competency Level', fontsize=14)
                label = "Code Construct Count" if score == "ALL" else "File Count"
                plt.ylabel(label, fontsize=14)
                title = f' Compentency in each code construct ({project})' if score == "ALL" else f' Compentency in each file ({project})'
                plt.title(title, fontsize=14)
                # plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
                # plt.xlim(40, 160)
                # plt.ylim(0, 0.03)
                plt.grid(True)

                header = [
                        "A1",
                        "A2",
                        "B1",
                        "B2",
                        "C1",
                        "C2",
                ]

                plt.hist(np.array(dataset_1), bins=range(
                    1, 8), align='left', rwidth=0.5)

                # Set the x-axis tick labels
                plt.xticks(np.arange(1, 7), ['A1', 'A2', 'B1', 'B2', 'C1', 'C2'], fontsize=12)
                # plt.hist(np.array(dataset_1), bins=range(1,8),  align='left', rwidth=0.5)
                # plt.show()
                plt.savefig(f"fig/hist-{project}-{score}.pdf",bbox_inches='tight', dpi=150)
                plt.close('all')