In [1]:
import sys, os, getpass, warnings
warnings.filterwarnings('ignore')
#sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.path.realpath("__file__")), '..', 'src')))  # append parent directory to search path
from patterns.visualizer import Visualizer

Bring in data from database and annotate it with *locc*, *locc+*, *locc-*, *change-size-cos* = *1-similarity* (**expensive call**)

In [None]:
vis = Visualizer(project_name='petsc', db_pwd=getpass.getpass(prompt='Database password:'))
vis.get_data()

Database password: ·······


In [None]:
all_commits = vis.commit_data
all_commits.head()

## Remove all noncode files
The determination of what is code is made by including common suffixes, as well as checking manually a sampling of ECP projects for the suffixes used for things that can be labeled as code (vs. input simulation data, documentation, or generated files).

In [None]:
vis.remove_noncode()
print("Removed %d noncode files!" % (all_commits.shape[0] - vis.commit_data.shape[0]))
vis.commit_data.head()

In [None]:
df = vis.plot_overall_project_locc(time_range=None, log=True)

to focus on a given year and/or month, set *year* and *month* fields

In [None]:
vis.set_year(2020)
vis.set_month(7)

to plot for a given year, provide *time_range='year'*

In [None]:
df = vis.plot_overall_project_locc(time_range='year',log=True)

similarly to plot for a given month, set *time_range='month'*

In [None]:
vis.plot_overall_project_locc(time_range='month',log=True)

### Averages: Total changed lines over time

In [None]:
vis.plot_total_locc_avg()

In [None]:
vis.plot_total_locc_moving_avgs()

In [None]:
vis.plot_total_locc_moving_avgs_M()

## Total changed lines over time
This reflects changed lines of code as reported in git commit diffs.

In [None]:
df = vis.plot_project_locc_line()

In [None]:
df2 = vis.plot_project_locc_line(locc=False)

In [None]:
import seaborn as sns
sns.set(font_scale=1.5)
with sns.axes_style("whitegrid"):
    g = sns.relplot(data=df2, x="datetime", y="locc", size="change-size-cos", hue="change-size-cos", sizes=(50, 500),
                    height=6, aspect=1.5, kind="scatter")
    g.ax.set_xlabel('Date')
    g.ax.set_ylabel('lines added + lines removed')
    g.fig.autofmt_xdate()
    g.fig.show()
df.head()

### Using a distance metric to adjust the size of the changes

We use the python [textdistance](https://github.com/life4/textdistance) module. The following algorithms have been integrated with the visualizer.
```
'cos', 'hamming', 'damerau_levenshtein', 'jaccard', 'jaro', 'jaro_winkler', 'bag', 'editex'
```

In [None]:
diff_alg = 'jaccard'
df = vis.plot_project_locc_line(diff_alg=diff_alg, log=False)
import seaborn as sns
sns.set(font_scale=1.5)
with sns.axes_style("whitegrid"):
    g = sns.relplot(data=df, x="datetime", y="locc", size="change-size-%s" % diff_alg, hue="change-size-%s" % diff_alg, sizes=(50, 500),
                    height=6, aspect=1.5, kind="scatter")
    g.ax.set_xlabel('Date')
    g.ax.set_ylabel('lines added + lines removed')
    g.fig.autofmt_xdate()
    g.fig.show()
df.head()

## Change size over time

In [None]:
df = vis.plot_proj_change_size(time_range=None)

Similarly, we can 'zoom' into a specific year

In [None]:
vis.plot_proj_change_size(time_range='year')

Or a specific year range

In [None]:
vis.select_year_range(2018,2020)
vis.plot_proj_change_size(time_range='year-year')

we can zoom into a month

In [None]:
vis.plot_proj_change_size(time_range='month')

Or a month range

In [None]:
vis.select_month_range(5,11)
vis.plot_proj_change_size(time_range='month-month')

## More patterns
Here we look at a combination of the the high-churn and domain champion patterns. Basically we are focusing on the files that have the most changes and restricting the developers by those with the biggest contributions.

In [None]:
N = 10
vis.set_max_label_length(30)
top_N = vis.plot_top_N_heatmap(N, value_column='locc')
top_N.head()

In [None]:
import seaborn as sns
top_N = vis.plot_top_N_heatmap(N, value_column='change-size-cos')
top_N.head()

In [None]:
from patterns.patterns import Patterns
df = vis.commit_data

In [None]:
vis.reset()
vis.annotate_metrics(diff_alg='cos')
file_dev_locc = vis.make_file_developer_df(value_column='locc')
file_dev_diff = vis.make_file_developer_df(value_column='change-size-cos')

In [None]:
diff_df = file_dev_locc.sub(file_dev_diff, axis=0)
print(diff_df.shape)
diff_df.head()

In [None]:
df = vis.commit_data
df['locc-cos diff'] = df['locc']-df['change-size-cos']
d = vis.plot_top_N_heatmap(top_N = 10, value_column='locc-cos diff', my_df=df)

In [None]:
#= vis.plot_top_N_heatmap(20,my_df=diff_df)