# Structural and logical change coupling

Instructions: 

* Replace the proj_name and proj_datafolder in the Configuration section
* [optional] replace the cloud drive folder in the Configuration section 

## Configuration

In [1]:
proj_name = 'glucosio-android' #'PROJ_NAME'
proj_data_folder = './ex_ubuntu/' # database files location

GDRIVE_FOLDER = 'callgraphCA/glucosioExample'  # cloud drive folder

In [2]:
from google.colab import drive
import os
 
drive.mount('/gdrive')
# the project's folder
drive_folder = '/gdrive/My Drive/' + GDRIVE_FOLDER
os.chdir(drive_folder)

Mounted at /gdrive


In [3]:
os.getcwd()

'/gdrive/My Drive/callgraphCA/glucosioExample'

## Imports

In [4]:
!pip install apyori
# https://github.com/ymoch/apyori
# https://medium.com/linkit-intecs/apriori-algorithm-in-data-mining-part-2-590d58e0998b

!pip install python-stopwatch

Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5974 sha256=1d08ec5dd6521968355ca0069e8e4c296373aa2f548501d562a9924dd6582ff3
  Stored in directory: /root/.cache/pip/wheels/cb/f6/e1/57973c631d27efd1a2f375bd6a83b2a616c4021f24aab84080
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2
Collecting python-stopwatch
  Downloading python_stopwatch-1.0.4-py3-none-any.whl (6.5 kB)
Installing collected packages: python-stopwatch
Successfully installed python-stopwatch-1.0.4


In [5]:
import pandas as pd
import sqlite3
#from re import search
#from typing import List
#import time

#from apyori import apriori
import apyori
#from stopwatch import Stopwatch, profile
# works with lists, not pandas, no nan values, apostrophe between values of transaction

# min_support -- The minimum support of relations (float).
# min_confidence -- The minimum confidence of relations (float).
# min_lift -- The minimum lift of relations (float).
# max_length -- The maximum length of the relation (integer).


In [6]:
from analytics.coupling_association_rules_utils import *

In [34]:
import importlib
import analytics.coupling_association_rules_utils

importlib.reload(analytics.coupling_association_rules_utils)
from analytics.coupling_association_rules_utils import *

## Database connections

In [7]:
ANALYTICS_DB_PATH =  proj_data_folder + proj_name + '_analytics.db'
print(ANALYTICS_DB_PATH)
os.path.exists(ANALYTICS_DB_PATH)
con_analytics_db = sqlite3.connect(ANALYTICS_DB_PATH)

./ex_ubuntu/glucosio-android_analytics.db


In [8]:
cur = con_analytics_db.cursor()

# Change coupling and structural dependency rates

In [26]:
## On commit and file level - distinct
sql_statement = """select 
--commit_hash,
GROUP_CONCAT(distinct("'" || file_name|| "'") )  as files_in_hash
from file_commit
group by commit_hash;"""

records, pruned_records, df = get_records(con_analytics_db, 'files_in_hash', sql_statement, 2)

df len:  584
records len:  584
pruned_records len:  221


In [None]:
# for applying the apyori.apriori algorithm the records list must have a specific format
records[0:5]

We can observe the differences between applying association rule mining to the whole itemset records or just the ones who are pruned with a minimum number of items (default >2)

In [35]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, records, min_confidence=0.1, min_support=0.1)

Nr rules 5, with structural coupling 0, 0.0


In [45]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, pruned_records, min_confidence=0.1, min_support=0.1)

Nr rules 22, with structural coupling 4, 0.18


The funciton *calculate_structural_coupling_rates* retunrs two lists, the itemsets_list contains itemsets > 2, the rules_list contains all found association rules (including the ones for one item because they show the support of the item in the whole transactions set). 

In [46]:
for r in itemsets_list:
  print(r)

['DatabaseHandler.java', 'OverviewFragment.java']
['HistoryAdapter.java', 'OverviewFragment.java']
['MainActivity.java', 'HistoryFragment.java']
['MainActivity.java', 'OverviewFragment.java']
['OverviewPresenter.java', 'OverviewFragment.java']


In [38]:
for r in rules_list:
  print(r)

RelationRecord(items=frozenset({"'AddGlucoseActivity.java'"}), support=0.18552036199095023, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucoseActivity.java'"}), confidence=0.18552036199095023, lift=1.0)])
RelationRecord(items=frozenset({"'AddGlucosePresenter.java'"}), support=0.13574660633484162, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucosePresenter.java'"}), confidence=0.13574660633484162, lift=1.0)])
RelationRecord(items=frozenset({"'AddKetoneActivity.java'"}), support=0.1085972850678733, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddKetoneActivity.java'"}), confidence=0.1085972850678733, lift=1.0)])
RelationRecord(items=frozenset({"'AddWeightActivity.java'"}), support=0.11764705882352941, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddWeightActivity.java'"}), confidence=0.11764705882352941, lift=1.0)])
Relation

We can observe the influence of setting different values of confidence and support.

In [39]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, pruned_records, min_confidence=0.1, min_support=0.1)

Nr rules 22, with structural coupling 4, 0.18


In [40]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, pruned_records, min_confidence=0.15, min_support=0.15)

Nr rules 8, with structural coupling 0, 0.0


In [41]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, pruned_records, min_confidence=0.05, min_support=0.1)

Nr rules 22, with structural coupling 4, 0.18


In [42]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, pruned_records, min_confidence=0.05, min_support=0.05)

Nr rules 131, with structural coupling 87, 0.66


# Display transactions with given itemsets

In [51]:
l_elem = ['OverviewFragment.java', 'DatabaseHandler.java']
show_transactions_containing_items(df, 'files_in_hash',l_elem, print_elems=False)

Element count. Df len 584. 1ind: 93, 2dep: 25, 2ind: 92


In [52]:
l_elem = ['AddCholesterolActivity.java', 'AddA1CActivity.java']
show_transactions_containing_items(df,'files_in_hash', l_elem, print_elems=False)

Element count. Df len 584. 1ind: 22, 2dep: 14, 2ind: 16


In [53]:
l_elems = ["'AddKetoneActivity.java'", "'AddWeightActivity.java'", "'AddGlucoseActivity.java'", "'AddPressureActivity.java'"]
show_transactions_containing_items(df, 'files_in_hash', l_elems, print_elems=False)

Element count. Df len 584. 1ind: 25, 2dep: 20, 3dep: 17, 4dep: 17,
    2ind: 26, 3ind: 47, 4ind: 21


## On week

In [54]:
# apyori.apriori needs apostrophes around each of the values of the transaction
sql_statement = """select
strftime('%Y', date(commit_commiter_datetime)) as iso_yr,
(strftime('%j', date(commit_commiter_datetime, '-3 days', 'weekday 4')) - 1) / 7 + 1 as iso_week,
GROUP_CONCAT("'" || file_name|| "'") as files_in_week
--GROUP_CONCAT(distinct("'" || file_name|| "'") ) as files_in_week
from file_commit
group by strftime('%Y', date(commit_commiter_datetime)),
(strftime('%j', date(commit_commiter_datetime, '-3 days', 'weekday 4')) - 1) / 7 + 1;"""


records, pruned_records, df = get_records(con_analytics_db, 'files_in_week', sql_statement, 2)

df len:  87
records len:  87
pruned_records len:  78


In [55]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, records, min_confidence=0.1, min_support=0.1)

Nr rules 379, with structural coupling 288, 0.76


In [60]:
rules_list, itemsets_list = calculate_structural_coupling_rates(con_analytics_db, pruned_records, min_confidence=0.5, min_support=0.1)

Nr rules 581, with structural coupling 516, 0.89
