# Breve análise dos resultados obtidos

Neste notebook, são computados as média e mediana dos projetos coletados, dados os valores resultantes da detecção de padrões.

In [1]:
collected_projects_dir = '/home/gabriel/Personal/Study/2021.1/TG/collected-projects'

In [2]:
projects_pattern = f'{collected_projects_dir}/*/'

dirnames = !echo $projects_pattern

dirnames = dirnames[0].split(' ')
dirnames = [dirname[:-1] for dirname in dirnames] # Removendo barra restante de cada nome de diretório

[dirname.split('/')[-1] for dirname in dirnames]

['ARouter',
 'Android-CleanArchitecture',
 'Android-ObservableScrollView',
 'AndroidAutoSize',
 'AndroidUtilCode',
 'Auto.js',
 'BaseRecyclerViewAdapterHelper',
 'Detox',
 'DoraemonKit',
 'ExoPlayer',
 'FileDownloader',
 'GSYVideoPlayer',
 'ImmersionBar',
 'MMKV',
 'MPAndroidChart',
 'MVPArms',
 'MaterialDesignLibrary',
 'Matisse',
 'NewPipe',
 'PermissionsDispatcher',
 'PictureSelector',
 'QMUI_Android',
 'SmartRefreshLayout',
 'VirtualXposed',
 'androidannotations',
 'cocos2d-x',
 'easypermissions',
 'fastlane',
 'flutter',
 'glide',
 'ijkplayer',
 'joplin',
 'libgdx',
 'logger',
 'material-components-android',
 'matrix',
 'mediapipe',
 'realm-java',
 'scrcpy',
 'termux-app',
 'uCrop',
 'vlayout',
 'wcdb',
 'weex',
 'xbmc',
 'zxing']

In [3]:
len(dirnames)

46

In [4]:
text_results_files = [f'{dirname}.txt' for dirname in dirnames]

In [5]:
# As únicas coleções de linhas consideráveis são aquelas para as quais não houve erros
# e que não ouve resultados NaN, que indicam uma quantidade zero de arquivos Activity.java.
# Enquanto há restrições na busca para que projetos sem arquivos Activity.java sejam selecionados,
# ainda é possível que essa situação ocorra pela presença de arquivos cujos nomes apresentem a
# mesma sequências de letras, como quaisquer que obedeçam a expressão regular
# .*A.*c.*t.*i.*v.*i.*t.*.y.*\..*j.*a.*v.*a.*
def is_elligible(lines):
    prohibited_elements = [
        '!! ERROR !!',
        'NaN'
    ]
    
    return not any(any(elem in line for elem in prohibited_elements) for line in lines)

def lines_from_filename(filename):
    with open(filename, 'r') as f:
        return f.readlines()

In [6]:
line_lists = (lines_from_filename(filename) for filename in text_results_files)

elligible_line_lists = list(filter(is_elligible, line_lists))

result_strings = [lines[-1] for lines in elligible_line_lists]
verdict_strings = [lines[0] for lines in elligible_line_lists]

In [7]:
pattern_names = { 'MVC', 'MVP', 'MVVM', 'MVI' }

In [8]:
import re

In [9]:
def check_pattern_name(pattern_name):
    if pattern_name not in pattern_names:
        raise Exception(f'Error: given pattern, "{pattern_name}", is not one of "MVC", "MVP", "MVVM", or "MVI"')

def verdict_for_pattern_name(pattern_name, text):
    check_pattern_name(pattern_name)
    
    return pattern_name in text

def value_for_pattern_name(pattern_name, text):
    check_pattern_name(pattern_name)
    
    match = re.search(f'{pattern_name}' r': (\d*.\d*)', text)
    
    return float(match.group(1))

In [10]:
results_per_pattern = {
    pattern_name : [value_for_pattern_name(pattern_name, result_string) for result_string in result_strings]
    for pattern_name
    in pattern_names
}

In [11]:
verdicts_per_pattern = {
    pattern_name : [verdict_for_pattern_name(pattern_name, result_string) for result_string in verdict_strings]
    for pattern_name
    in pattern_names
}

In [12]:
!pip install pandas

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.


In [13]:
import pandas as pd

In [14]:
ordered_pattern_names = ['MVC', 'MVP', 'MVVM', 'MVI']

result_lists = [results_per_pattern[pattern_name] for pattern_name in ordered_pattern_names]
verdict_lists = [verdicts_per_pattern[pattern_name] for pattern_name in ordered_pattern_names]

result_table_transposed = [text_results_files, *result_lists, *verdict_lists]

result_table = list(zip(*result_table_transposed))

In [15]:
result_table[:3]

[('/home/gabriel/Personal/Study/2021.1/TG/collected-projects/ARouter.txt',
  0.0,
  0.6333333333333333,
  0.0,
  0.0,
  False,
  True,
  False,
  False),
 ('/home/gabriel/Personal/Study/2021.1/TG/collected-projects/Android-CleanArchitecture.txt',
  0.0,
  0.782608695652174,
  0.0,
  0.0,
  False,
  True,
  False,
  False),
 ('/home/gabriel/Personal/Study/2021.1/TG/collected-projects/Android-ObservableScrollView.txt',
  0.0,
  0.8035714285714286,
  0.0,
  0.0,
  False,
  True,
  False,
  False)]

In [16]:
result_df = pd.DataFrame(result_table, columns = [
    'filename',
    'MVC',
    'MVP',
    'MVVM',
    'MVI',
    'MVC_VERDICT',
    'MVP_VERDICT',
    'MVVM_VERDICT',
    'MVI_VERDICT'
])

result_df.head()

Unnamed: 0,filename,MVC,MVP,MVVM,MVI,MVC_VERDICT,MVP_VERDICT,MVVM_VERDICT,MVI_VERDICT
0,/home/gabriel/Personal/Study/2021.1/TG/collect...,0.0,0.633333,0.0,0.0,False,True,False,False
1,/home/gabriel/Personal/Study/2021.1/TG/collect...,0.0,0.782609,0.0,0.0,False,True,False,False
2,/home/gabriel/Personal/Study/2021.1/TG/collect...,0.0,0.803571,0.0,0.0,False,True,False,False
3,/home/gabriel/Personal/Study/2021.1/TG/collect...,0.0,0.5,0.0,0.0,False,False,False,False
4,/home/gabriel/Personal/Study/2021.1/TG/collect...,0.0,0.885714,0.0,0.0,False,True,False,False


In [17]:
result_df['name'] = result_df['filename'].apply(lambda filename: re.search(r'([^/]+)\.txt$', filename).group(1))

result_df[['name', 'filename']].head()

Unnamed: 0,name,filename
0,ARouter,/home/gabriel/Personal/Study/2021.1/TG/collect...
1,Android-CleanArchitecture,/home/gabriel/Personal/Study/2021.1/TG/collect...
2,Android-ObservableScrollView,/home/gabriel/Personal/Study/2021.1/TG/collect...
3,AndroidAutoSize,/home/gabriel/Personal/Study/2021.1/TG/collect...
4,AndroidUtilCode,/home/gabriel/Personal/Study/2021.1/TG/collect...


## Histogramas

Aqui, plotar-se-ão histogramas dos valores limite para cada padrão arquitetural.

Antes de iniciarmos, vejamos a quantidade de projetos para cada veredito para cada padrão arquitetural.

In [18]:
def verdict_count_for_pattern_name(pattern_name):
    return result_df[[f'{pattern_name}_VERDICT', 'name']].groupby(f'{pattern_name}_VERDICT').count()

verdict_count_for_pattern_name('MVC')

Unnamed: 0_level_0,name
MVC_VERDICT,Unnamed: 1_level_1
False,41


In [19]:
verdict_count_for_pattern_name('MVP')

Unnamed: 0_level_0,name
MVP_VERDICT,Unnamed: 1_level_1
False,6
True,35


In [20]:
verdict_count_for_pattern_name('MVVM')

Unnamed: 0_level_0,name
MVVM_VERDICT,Unnamed: 1_level_1
False,41


In [21]:
verdict_count_for_pattern_name('MVI')

Unnamed: 0_level_0,name
MVI_VERDICT,Unnamed: 1_level_1
False,39
True,2


De acordo com a revisão dos _outputs_ de _debugging_ do detector de padrões, MVP é apresentado muito frequentemente, MVI somente duas vezes, enquanto os outros em nenhum dos projetos estudados. Há, também, projetos sem padrão definido.

In [22]:
result_df[
    ~result_df['MVC_VERDICT'] &
    ~result_df['MVP_VERDICT'] &
    ~result_df['MVVM_VERDICT'] &
    ~result_df['MVI_VERDICT']
][['MVC', 'MVP', 'MVVM', 'MVI', 'name']]

Unnamed: 0,MVC,MVP,MVVM,MVI,name
3,0.0,0.5,0.0,0.0,AndroidAutoSize
23,0.0,0.344538,0.0,0.0,VirtualXposed
27,0.0,0.592593,0.0,0.0,fastlane
29,0.0,0.0,0.0,0.0,glide
30,0.0,0.5,0.0,0.0,ijkplayer
39,0.0,0.666667,0.0,0.0,termux-app
