# Structural and logical change coupling

Instructions: 

* Replace the proj_name and proj_datafolder in the Configuration section
* [optional] replace the cloud drive folder in the Configuration section 

## Configuration

In [1]:
proj_name = 'glucosio-android' #'PROJ_NAME'
proj_data_folder = './ex_ubuntu/' # database files location

GDRIVE_FOLDER = 'callgraphCA/glucosioExample'  # cloud drive folder

In [2]:
from google.colab import drive
import os
 
drive.mount('/gdrive')
# the project's folder
drive_folder = '/gdrive/My Drive/' + GDRIVE_FOLDER
os.chdir(drive_folder)

Mounted at /gdrive


In [3]:
os.getcwd()

'/gdrive/My Drive/callgraphCA/glucosioExample'

## Imports

In [5]:
!pip install apyori
# https://github.com/ymoch/apyori
# https://medium.com/linkit-intecs/apriori-algorithm-in-data-mining-part-2-590d58e0998b

!pip install python-stopwatch

Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5974 sha256=214a2a07470c4913a07056f5d25e204a4e3af48760cb535860442622d8f12e30
  Stored in directory: /root/.cache/pip/wheels/cb/f6/e1/57973c631d27efd1a2f375bd6a83b2a616c4021f24aab84080
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2
Collecting python-stopwatch
  Downloading python_stopwatch-1.0.4-py3-none-any.whl (6.5 kB)
Installing collected packages: python-stopwatch
Successfully installed python-stopwatch-1.0.4


In [6]:
import pandas as pd
import sqlite3
from re import search
from typing import List
import importlib
import time

#from apyori import apriori
import apyori
#from stopwatch import Stopwatch, profile
# works with lists, not pandas, no nan values, apostrophe between values of transaction

# min_support -- The minimum support of relations (float).
# min_confidence -- The minimum confidence of relations (float).
# min_lift -- The minimum lift of relations (float).
# max_length -- The maximum length of the relation (integer).


In [7]:
from analytics.coupling_association_rules_utils import *
from analytics.structural_dependencies import *

In [None]:
import coupling_association_rules_utils

importlib.reload(coupling_association_rules_utils)
from coupling_association_rules_utils import *

## Database connections

In [8]:
ANALYTICS_DB_PATH =  proj_data_folder + proj_name + '_analytics.db'
print(ANALYTICS_DB_PATH)
os.path.exists(ANALYTICS_DB_PATH)
con_analytics_db = sqlite3.connect(ANALYTICS_DB_PATH)

./ex_ubuntu/glucosio-android_analytics.db


In [None]:
cur = con_analytics_db.cursor()

# Change coupling

## On commit and file level - distinct

In [9]:
sql_statement = """select 
--commit_hash,
GROUP_CONCAT(distinct("'" || file_name|| "'") )  as files_in_hash
from file_commit
group by commit_hash;"""

records, pruned_records, df = get_records(con_analytics_db, 'files_in_hash', sql_statement, 2)

df len:  584
records len:  584
pruned_records len:  221


In [11]:
records[0:5]

[["'HelloActivity.java'",
  "'LicenceActivity.java'",
  "'PreferencesActivity.java'",
  "'AssistantAdapter.java'",
  "'HistoryAdapter.java'",
  "'HomePagerAdapter.java'",
  "'HistoryFragment.java'",
  "'OverviewFragment.java'",
  "'AssistantPresenter.java'",
  "'HelloPresenter.java'",
  "'HistoryPresenter.java'",
  "'OverviewPresenter.java'",
  "'None'",
  "'FormatDateTime.java'",
  "'ReadingTools.java'"],
 ["'PreferencesActivityTest.java'"],
 ["'OverviewFragment.java'"],
 ["'GlucosioApplication.java'", "'MainActivity.java'"],
 ["'OverviewFragment.java'"]]

In [12]:
rules = apyori.apriori(records, min_confidence=0.1, min_support=0.1) # min_lift=0.2, 
rules_list = list(rules)
print(len(rules_list))

5


In [13]:
for r in rules_list:
  print(r)

RelationRecord(items=frozenset({"'DatabaseHandler.java'"}), support=0.15753424657534246, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'DatabaseHandler.java'"}), confidence=0.15753424657534246, lift=1.0)])
RelationRecord(items=frozenset({"'HelloActivity.java'"}), support=0.10273972602739725, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'HelloActivity.java'"}), confidence=0.10273972602739725, lift=1.0)])
RelationRecord(items=frozenset({"'MainActivity.java'"}), support=0.285958904109589, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'MainActivity.java'"}), confidence=0.285958904109589, lift=1.0)])
RelationRecord(items=frozenset({"'OverviewFragment.java'"}), support=0.15924657534246575, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'OverviewFragment.java'"}), confidence=0.15924657534246575, lift=1.0)])
RelationRecord(items=frozenset({"'Prefer

In [14]:
rules = apyori.apriori(records, min_confidence=0.05, min_support=0.05) # min_lift=0.2, 
rules_list = list(rules)
print(len(rules_list))

15


In [16]:
for r in rules_list[0:5]:
  print(r)

RelationRecord(items=frozenset({"'AddGlucoseActivity.java'"}), support=0.08047945205479452, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucoseActivity.java'"}), confidence=0.08047945205479452, lift=1.0)])
RelationRecord(items=frozenset({"'AddGlucosePresenter.java'"}), support=0.06164383561643835, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucosePresenter.java'"}), confidence=0.06164383561643835, lift=1.0)])
RelationRecord(items=frozenset({"'BackupActivity.java'"}), support=0.06164383561643835, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'BackupActivity.java'"}), confidence=0.06164383561643835, lift=1.0)])
RelationRecord(items=frozenset({"'DatabaseHandler.java'"}), support=0.15753424657534246, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'DatabaseHandler.java'"}), confidence=0.15753424657534246, lift=1.0)])
RelationRecord(i

### pruned records with 2 or more occurrences

In [17]:
pruned_records[0:3]

[["'HelloActivity.java'",
  "'LicenceActivity.java'",
  "'PreferencesActivity.java'",
  "'AssistantAdapter.java'",
  "'HistoryAdapter.java'",
  "'HomePagerAdapter.java'",
  "'HistoryFragment.java'",
  "'OverviewFragment.java'",
  "'AssistantPresenter.java'",
  "'HelloPresenter.java'",
  "'HistoryPresenter.java'",
  "'OverviewPresenter.java'",
  "'None'",
  "'FormatDateTime.java'",
  "'ReadingTools.java'"],
 ["'DatabaseHandler.java'",
  "'OverviewFragment.java'",
  "'OverviewPresenter.java'"],
 ["'GlucosioApplication.java'",
  "'AddA1CActivity.java'",
  "'AddCholesterolActivity.java'",
  "'AddKetoneActivity.java'",
  "'AddPressureActivity.java'",
  "'AddWeightActivity.java'",
  "'OverviewFragment.java'",
  "'OverviewPresenter.java'",
  "'HelloActivityTest.java'"]]

In [20]:
rules = apyori.apriori(pruned_records, min_confidence=0.15, min_support=0.15) # min_lift=0.2, 
rules_list = list(rules)
print(len(rules_list))
for r in rules_list:
  print(r)

8
RelationRecord(items=frozenset({"'AddGlucoseActivity.java'"}), support=0.18552036199095023, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucoseActivity.java'"}), confidence=0.18552036199095023, lift=1.0)])
RelationRecord(items=frozenset({"'DatabaseHandler.java'"}), support=0.29411764705882354, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'DatabaseHandler.java'"}), confidence=0.29411764705882354, lift=1.0)])
RelationRecord(items=frozenset({"'HistoryAdapter.java'"}), support=0.1583710407239819, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'HistoryAdapter.java'"}), confidence=0.1583710407239819, lift=1.0)])
RelationRecord(items=frozenset({"'HistoryFragment.java'"}), support=0.16289592760180996, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'HistoryFragment.java'"}), confidence=0.16289592760180996, lift=1.0)])
RelationRecord(items=fro

In [22]:
rules = apyori.apriori(pruned_records, min_confidence=0.1, min_support=0.1) # min_lift=0.2, 
rules_list = list(rules)
print(len(rules_list))

22


In [25]:
for r in rules_list:
  print(r)
  

RelationRecord(items=frozenset({"'AddGlucoseActivity.java'"}), support=0.18552036199095023, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucoseActivity.java'"}), confidence=0.18552036199095023, lift=1.0)])
RelationRecord(items=frozenset({"'AddGlucosePresenter.java'"}), support=0.13574660633484162, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucosePresenter.java'"}), confidence=0.13574660633484162, lift=1.0)])
RelationRecord(items=frozenset({"'AddKetoneActivity.java'"}), support=0.1085972850678733, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddKetoneActivity.java'"}), confidence=0.1085972850678733, lift=1.0)])
RelationRecord(items=frozenset({"'AddWeightActivity.java'"}), support=0.11764705882352941, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddWeightActivity.java'"}), confidence=0.11764705882352941, lift=1.0)])
Relation

In [27]:
rules = apyori.apriori(pruned_records, min_confidence=0.05, min_support=0.05)
rules_list = list(rules)
print(len(rules_list))

131


In [28]:
for r in rules_list:
  print(r)

RelationRecord(items=frozenset({"'AboutActivity.java'"}), support=0.06787330316742081, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AboutActivity.java'"}), confidence=0.06787330316742081, lift=1.0)])
RelationRecord(items=frozenset({"'AddA1CActivity.java'"}), support=0.07239819004524888, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddA1CActivity.java'"}), confidence=0.07239819004524888, lift=1.0)])
RelationRecord(items=frozenset({"'AddCholesterolActivity.java'"}), support=0.09954751131221719, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddCholesterolActivity.java'"}), confidence=0.09954751131221719, lift=1.0)])
RelationRecord(items=frozenset({"'AddGlucoseActivity.java'"}), support=0.18552036199095023, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'AddGlucoseActivity.java'"}), confidence=0.18552036199095023, lift=1.0)])
RelationRecord

# TODO parse RelationRecord

In [None]:
# See the items that were bought together with their support
parsed_rules = []
for i in range(0, len(rules_list)):
    parsed_rules.append('RULE:\t' + str(rules_list[i][0]) + '\nSUPPORT:\t' + str(rules_list[i][1]))

### find transactions where this rules happened

In [29]:
print(df.loc[df['files_in_hash'] == "'PreferencesActivity.java','LocaleHelper.java'"])


                                      files_in_hash
266  'PreferencesActivity.java','LocaleHelper.java'
276  'PreferencesActivity.java','LocaleHelper.java'
305  'PreferencesActivity.java','LocaleHelper.java'


In [30]:
l_elem = ['OverviewFragment.java', 'DatabaseHandler.java']
show_transactions_containing_items(df, 'files_in_hash',l_elem, print_elems=False)

Element count. Df len 584. 1ind: 93, 2dep: 25, 2ind: 92


In [31]:
l_elem = ['AddCholesterolActivity.java', 'AddA1CActivity.java']
show_transactions_containing_items(df,'files_in_hash', l_elem, print_elems=False)

Element count. Df len 584. 1ind: 22, 2dep: 14, 2ind: 16


In [33]:
l_elems = ["'AddKetoneActivity.java'", "'AddWeightActivity.java'", "'AddGlucoseActivity.java'", "'AddPressureActivity.java'"]
show_transactions_containing_items(df, 'files_in_hash', l_elems, print_elems=False)

Element count. Df len 584. 1ind: 25, 2dep: 20, 3dep: 17, 4dep: 17,
    2ind: 26, 3ind: 47, 4ind: 21


In [34]:
for e in l_elems:
    print(e)

'AddKetoneActivity.java'
'AddWeightActivity.java'
'AddGlucoseActivity.java'
'AddPressureActivity.java'


## On week

In [36]:
# apyori.apriori needs apostrophes around each of the values of the transaction
sql_statement = """select
strftime('%Y', date(commit_commiter_datetime)) as iso_yr,
(strftime('%j', date(commit_commiter_datetime, '-3 days', 'weekday 4')) - 1) / 7 + 1 as iso_week,
GROUP_CONCAT("'" || file_name|| "'") as files_in_week
--GROUP_CONCAT(distinct("'" || file_name|| "'") ) as files_in_week
from file_commit
group by strftime('%Y', date(commit_commiter_datetime)),
(strftime('%j', date(commit_commiter_datetime, '-3 days', 'weekday 4')) - 1) / 7 + 1;"""


records, pruned_records, df = get_records(con_analytics_db, 'files_in_week', sql_statement, 2)

df len:  87
records len:  87
pruned_records len:  78


In [38]:
rules = apyori.apriori(records, min_confidence=0.1, min_support=0.1) # min_lift=0.2, 
rules_list = list(rules) # from generator save to list for further analysis
print(len(rules_list))

379


In [39]:
for r in rules_list[100:104]:
    print(r)

RelationRecord(items=frozenset({"'MainActivity.java'", "'ExportPresenter.java'"}), support=0.1839080459770115, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'MainActivity.java'", "'ExportPresenter.java'"}), confidence=0.1839080459770115, lift=1.0), OrderedStatistic(items_base=frozenset({"'ExportPresenter.java'"}), items_add=frozenset({"'MainActivity.java'"}), confidence=0.8888888888888888, lift=1.380952380952381), OrderedStatistic(items_base=frozenset({"'MainActivity.java'"}), items_add=frozenset({"'ExportPresenter.java'"}), confidence=0.2857142857142857, lift=1.380952380952381)])
RelationRecord(items=frozenset({"'ExportPresenter.java'", "'PreferencesActivity.java'"}), support=0.11494252873563218, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'ExportPresenter.java'", "'PreferencesActivity.java'"}), confidence=0.11494252873563218, lift=1.0), OrderedStatistic(items_base=frozenset({"'ExportPresenter.java'"}), items_

In [40]:
for r in rules_list[100:101]:
  print(r.items)
  print(r.support)
  for os in r.ordered_statistics:
      print(os)

frozenset({"'MainActivity.java'", "'ExportPresenter.java'"})
0.1839080459770115
OrderedStatistic(items_base=frozenset(), items_add=frozenset({"'MainActivity.java'", "'ExportPresenter.java'"}), confidence=0.1839080459770115, lift=1.0)
OrderedStatistic(items_base=frozenset({"'ExportPresenter.java'"}), items_add=frozenset({"'MainActivity.java'"}), confidence=0.8888888888888888, lift=1.380952380952381)
OrderedStatistic(items_base=frozenset({"'MainActivity.java'"}), items_add=frozenset({"'ExportPresenter.java'"}), confidence=0.2857142857142857, lift=1.380952380952381)


In [41]:
#l_elem = ['ExportPresenter,java', 'ReadingToCSV.java']
#l_elem = ['AddKetoneActivity,java', 'AddWeightActivity.java']
l_elem = ['AddCholesterolActivity.java', 'AddA1CActivity.java']
show_transactions_containing_items(df, 'files_in_week', l_elem, print_elems=False)

Element count. Df len 87. 1ind: 14, 2dep: 8, 2ind: 10


In [42]:
l_elem = ['ExportPresenter.java', 'ReadingToCSV.java']
#l_elem = ['AddKetoneActivity,java', 'AddWeightActivity.java']
#l_elem = ['AddCholesterolActivity.java', 'AddA1CActivity.java']
show_transactions_containing_items(df, 'files_in_week', l_elem, print_elems=False)

Element count. Df len 87. 1ind: 18, 2dep: 13, 2ind: 20


In [45]:
rules = apyori.apriori(records, min_confidence=0.08, min_support=0.08) # min_lift=0.2, 
rules_list = list(rules) # from generator save to list for further analysis
print(len(rules_list))

1382


# Structural coupling

In [46]:
l_elem = ['ExportPresenter.java', 'ReadingToCSV.java']

In [49]:
exist_import_dependency(con_analytics_db, l_elem)

[[1, ['ExportPresenter.java', 'ReadingToCSV.java']]]

In [51]:
l_elem = ['ExportPresenter.java', 'bla.java']
exist_import_dependency(con_analytics_db, l_elem)

[[0, ['ExportPresenter.java', 'bla.java']]]

In [52]:
sql_statement = """SELECT * FROM file_pkg where file_name in ('{0}','{1}')""".format('ExportPresenter.java', 'ReadingToCSV.java')
df = pd.read_sql_query(sql_statement, con_analytics_db)

In [53]:
print(df.head())

              file_name  ...                                       class_pkg
0  ExportPresenter.java  ...  org.glucosio.android.presenter.ExportPresenter
1     ReadingToCSV.java  ...         org.glucosio.android.tools.ReadingToCSV

[2 rows x 5 columns]
