-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit with all data and some implementation
- Loading branch information
Showing
14 changed files
with
543 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
// Use IntelliSense to learn about possible attributes. | ||
// Hover to view descriptions of existing attributes. | ||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"name": "Bokeh server", | ||
"type": "python", | ||
"request": "attach", | ||
"port": 5678, | ||
"host": "localhost", | ||
"preLaunchTask": "launch Bokeh server", | ||
"postDebugTask": "kill Bokeh server" | ||
}, | ||
{ | ||
"name": "Python: Current File", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${file}", | ||
"console": "integratedTerminal", | ||
"justMyCode": true | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
// See https://go.microsoft.com/fwlink/?LinkId=733558 | ||
// for the documentation about the tasks.json format | ||
"version": "2.0.0", | ||
"tasks": [ | ||
{ | ||
"label": "kill Bokeh server", | ||
"command": "${command:workbench.action.tasks.terminate}", | ||
"problemMatcher": {}, | ||
"presentation": { | ||
"reveal": "never", | ||
"panel": "shared", | ||
"showReuseMessage": false | ||
} | ||
}, | ||
|
||
{ | ||
"label": "launch Bokeh server", | ||
"type": "shell", | ||
"isBackground": true, | ||
"command": "./venv/Scripts/bokeh", | ||
"args": [ | ||
"serve", | ||
"sliders.py", | ||
"--port", "5678" | ||
], | ||
"options": { | ||
"env": { | ||
"BOKEH_VS_DEBUG": "true" | ||
} | ||
}, | ||
// you have to allow the task to not complete by specifying a complete problem matcher | ||
"problemMatcher": { | ||
"fileLocation": [ | ||
"relative", | ||
"${workspaceFolder}" | ||
], | ||
"pattern": [ | ||
{ | ||
"regexp": ".", | ||
"file": 1, | ||
"location": 2, | ||
"message": 3 | ||
} | ||
], | ||
"background": { | ||
"activeOnStart": true, | ||
"endsPattern": "^.*Waiting for debugger attach.*", | ||
"beginsPattern": "^this should not match on anything" | ||
} | ||
} | ||
} | ||
] | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
CSV | ||
===== | ||
|
||
Contains an archive with all extracted metrics values from all systems. | ||
Please refer to this [`readme.md`](../files/readme.md) for how the metrics.csv.7z was created. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
Files | ||
====== | ||
|
||
The original [`metrics.zip`](http://web.archive.org/web/20220814110913/http://java.labsoft.dcc.ufmg.br/qualitas.class/corpus/metrics.zip) (84.2mb) from [http://java.labsoft.dcc.ufmg.br/qualitas.class/download.html](https://web.archive.org/web/20191223234321/http://java.labsoft.dcc.ufmg.br/qualitas.class/download.html) was repacked into `metrics.7z` (30.1mb) using PPMd. | ||
|
||
The metrics values from all these files (systems/projects) have been previously extracted into separate CSV files and merged into one large file. Those are stored under [`../csv/metrics.csv.7z`](../csv/metrics.csv.7z) (496kb). | ||
|
||
You can, however, extract your own metrics using methods of the class `QualitasCorpusMetricsExtractor`. The merged file from above does not retain any other information than the system, the metric, and the value, because the primary purpose of this repository is to approximate distributions. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from os import walk | ||
from typing import Iterable | ||
from src.data.metrics import QualitasCorpusMetricsExtractor, MetricID | ||
import pandas as pd | ||
import numpy as np | ||
|
||
|
||
|
||
|
||
from src.data.metrics import MetricID | ||
from src.distribution.distribution import Distribution | ||
|
||
|
||
d = Distribution(df=pd.read_csv('csv/metrics.csv')) | ||
data = d.get_cdf_data(metric_id=MetricID.RMA, unique_vals=False) | ||
cdf = Distribution.fit_parametric(data=data) | ||
cdf.save_to_file(file='./results/cdf_VG.pickle') | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
temp = pd.read_csv('csv/__ALL__.csv') | ||
|
||
rng = np.random.default_rng(seed=1337) | ||
r = rng.choice(np.linspace(0, 1e-6, len(temp)), len(temp), replace=False) | ||
#r = np.linspace(0, 1e-12, len(temp)) | ||
#np.random.seed(1337) | ||
#np.random.shuffle(r) | ||
|
||
nu0 = len(np.unique(temp['value'])) | ||
temp['value'] += r | ||
nu = len(np.unique(temp['value'])) | ||
|
||
|
||
def get_file_metrics(files: list[str], proj: str, files_dir: str='./files', csv_dir: str='./csv') -> pd.DataFrame: | ||
dicts = list() | ||
|
||
for file in files: | ||
qcme = QualitasCorpusMetricsExtractor(file=f'{files_dir}/{file}') | ||
for mid in set(MetricID): | ||
for v in qcme.metrics_values(metric_id=mid): | ||
dicts.append({ 'project': proj, 'metric': mid.name, 'value': v }) | ||
|
||
df = pd.DataFrame(dicts) | ||
df.to_csv(f'{csv_dir}/{proj}.csv', index=False) | ||
return df | ||
|
||
|
||
|
||
def convert_xml_to_csv(directory: str='./files'): | ||
prefixes = [] | ||
p = None | ||
|
||
for _, __, files in walk(directory): | ||
for file in files: | ||
p1 = file[0:7] | ||
# The first 7 characters suffice to split by system, | ||
# yet to retain separate projects that make it up | ||
if p is None or p != p1: | ||
prefixes.append(p1) | ||
p = p1 | ||
get_file_metrics(files=list(filter(lambda s: s.startswith(p1), files)), proj=p1) | ||
|
||
|
||
|
||
|
||
def concat_csv_files(directory: str='./csv', target_file_name: str='__ALL__.csv'): | ||
_, __, files = list(walk(directory))[0] | ||
|
||
df = pd.concat( | ||
map(pd.read_csv, list(map(lambda s: f'{directory}/{s}', files))), ignore_index=True) | ||
|
||
df.to_csv(f'{directory}/{target_file_name}', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
bokeh==2.4.3 | ||
jupyterlab==3.4.5 | ||
matplotlib==3.5.3 | ||
nptyping==2.2.0 | ||
pip-chill==1.0.1 | ||
ptvsd==4.3.2 | ||
statsmodels==0.13.2 | ||
strenum==0.4.8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
''' Present an interactive function explorer with slider widgets. | ||
Scrub the sliders to change the properties of the ``sin`` curve, or | ||
type into the title text box to update the title of the plot. | ||
Use the ``bokeh serve`` command to run the example by executing: | ||
bokeh serve sliders.py | ||
at your command prompt. Then navigate to the URL | ||
http://localhost:5006/sliders | ||
in your browser. | ||
''' | ||
import os | ||
import bokeh | ||
import ptvsd | ||
|
||
if os.environ['BOKEH_VS_DEBUG'] == 'true': | ||
# 5678 is the default attach port in the VS Code debug configurations | ||
print('Waiting for debugger attach') | ||
ptvsd.enable_attach(address=('localhost', 5678), redirect_output=True) | ||
ptvsd.wait_for_attach() | ||
|
||
|
||
|
||
|
||
|
||
|
||
import numpy as np | ||
|
||
from bokeh.events import MenuItemClick | ||
from bokeh.io import curdoc | ||
from bokeh.layouts import column, row | ||
from bokeh.models import ColumnDataSource, Slider, TextInput, Dropdown | ||
from bokeh.plotting import figure | ||
|
||
# Set up data | ||
N = 200 | ||
x = np.linspace(0, 4*np.pi, N) | ||
y = np.sin(x) | ||
source = ColumnDataSource(data=dict(x=x, y=y)) | ||
|
||
|
||
# Set up plot | ||
plot = figure(height=400, width=400, title="my sine wave", | ||
tools="crosshair", | ||
x_range=[0, 4*np.pi], y_range=[-2.5, 2.5]) | ||
|
||
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6) | ||
|
||
|
||
# Set up widgets | ||
text = TextInput(title="title", value='my sine wave') | ||
offset = Slider(title="offset", value=0.0, start=-5.0, end=5.0, step=0.1) | ||
amplitude = Slider(title="amplitude", value=1.0, start=-5.0, end=5.0, step=0.1) | ||
phase = Slider(title="phase", value=0.0, start=0.0, end=2*np.pi) | ||
freq = Slider(title="frequency", value=1.0, start=0.1, end=5.1, step=0.1) | ||
|
||
menu = [("Item 1", "item_1"), ("Item 2", "item_2"), None, ("Item 3", "item_3")] | ||
ddown = Dropdown(label=menu[0][0], menu=menu) | ||
|
||
def temp(evt: MenuItemClick): | ||
print(f'{evt.event_name}: {evt.item}') | ||
ddown.label = evt.item.replace('i', 'I').replace('_', ' ') | ||
if evt.item == 'item_3': | ||
ddown.menu = [('Item X', 'item_x')] | ||
ddown.label = 'Item X' | ||
|
||
ddown.on_click(temp) | ||
|
||
|
||
# Set up callbacks | ||
def update_title(attrname, old, new): | ||
plot.title.text = text.value | ||
|
||
text.on_change('value', update_title) | ||
|
||
def update_data(attrname, old, new): | ||
|
||
# Get the current slider values | ||
a = amplitude.value | ||
b = offset.value | ||
w = phase.value | ||
k = freq.value | ||
|
||
# Generate the new curve | ||
x = np.linspace(0, 4*np.pi, N) | ||
y = a*np.sin(k*x + w) + b | ||
|
||
source.data = dict(x=x, y=y) | ||
|
||
for w in [offset, amplitude, phase, freq]: | ||
w.on_change('value', update_data) | ||
|
||
|
||
# Set up layouts and add to document | ||
inputs = column(text, offset, amplitude, phase, freq, ddown) | ||
|
||
curdoc().add_root(row(inputs, plot, width=800)) | ||
curdoc().title = "Sliders" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from typing import Union | ||
from typing import Iterable | ||
from xml.dom.minidom import Document, Element, parse | ||
from strenum import StrEnum | ||
from re import match | ||
|
||
|
||
class MetricID(StrEnum): | ||
TLOC = 'Total Lines of Code' | ||
NOP = 'Number of Packages' | ||
NOC = 'Number of Classes' | ||
NOI = 'Number of Interfaces' | ||
NOM = 'Number of Methods' | ||
NOF = 'Number of Attributes' | ||
NORM = 'Number of Overridden Methods' | ||
PAR = 'Number of Parameters' | ||
NSM = 'Number of Static Methods' | ||
NSF = 'Number of Static Attributes' | ||
WMC = 'Weighted methods per Class' | ||
DIT = 'Depth of Inheritance Tree' | ||
NSC = 'Number of Children' | ||
LCOM = 'Lack of Cohesion of Methods' | ||
MLOC = 'Method Lines of Code' | ||
SIX = 'Specialization Index' | ||
VG = 'McCabe Cyclomatic Complexity' | ||
NBD = 'Nested Block Depth' | ||
RMD = 'Normalized Distance' | ||
CA = 'Afferent Coupling' | ||
CE = 'Efferent Coupling' | ||
RMI = 'Instability' | ||
RMA = 'Abstractness' | ||
|
||
|
||
class QCMetricsExtractor: | ||
def __init__(self, file: str) -> None: | ||
self.xml: Document = parse(file=file) | ||
|
||
def metrics_values(self, metric_id: MetricID) -> Iterable[Union[float, int]]: | ||
metric: Element = self.xml.getElementById(MetricID[metric_id]) | ||
fc: Element = metric.firstChild | ||
if fc.tagName == 'Value': | ||
# This metric has only one value, like TLOC. | ||
v = fc.getAttribute('value') | ||
if match(pattern=r'^\d+$', string=v): | ||
yield int(v) | ||
else: | ||
yield float(v) | ||
|
Oops, something went wrong.