In [31]:
import pickle
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
import numpy as np
import os
import pandas as pd
from sklearn import metrics

In [3]:
def __get_data_tensor(texts, label_type, source_type):
    
    TOKENIZER_PATH = f"D:\\peter_devine_projects\\replication_package\\AIRE19_models\\tokenizer\\tokenizer_Category.{label_type}_Language.ENGLISH_Source.{source_type}_.pkl"
    MAX_SEQ_LEN = 200
    # finally, vectorize the text samples into a 2D integer tensor

    with open(TOKENIZER_PATH, 'rb') as handle:
        tokenizer = pickle.load(handle)

    sequences = tokenizer.texts_to_sequences(texts)    

    word_index = tokenizer.word_index
    print('Found %s unique tokens.' % len(word_index))

    return pad_sequences(sequences, maxlen=MAX_SEQ_LEN), word_index

In [4]:
def get_ypreds(texts, label_type, source_type):
    
    model = load_model(f'D:\\peter_devine_projects\\replication_package\\AIRE19_models\\model\\model_Language.ENGLISH_Category.{label_type}_Source.{source_type}.h5')

    data, word_index = __get_data_tensor(texts, label_type, source_type)

    yp = model.predict(data, batch_size=32, verbose=0)
    
    return yp

In [34]:
def get_combined_ypreds(texts, label_type):
    
    yp_app = get_ypreds(texts, label_type, "APP")
    yp_twitter = get_ypreds(texts, label_type, "TWITTER")
    
    return (yp_app + yp_twitter) / 2

In [63]:
label_maps = {
    "chen_2014": [(["non-informative"], ["IRRELEVANT"])],
    "ciurumelea_2017": [(["OTHER"], ["IRRELEVANT"])],
    "di_sorbo_2016": [(["BUG"], ["PROBLEM"]), (["QUESTION", "REQUEST"], ["INQUIRY"])],
    "guzman_2015": [(["Bug report"], ["PROBLEM"]), (["User request"], ["INQUIRY"])],
    "maalej_2016": [(["Bug"], ["PROBLEM"]), (["Feature"], ["INQUIRY"])],
    "scalabrino_2017": [(["BUG"], ["PROBLEM"]), (["FEATURE"], ["INQUIRY"])],
    "tizard_2019": [(["apparent bug"], ["PROBLEM"]), (["feature request", "question on application", "help seeking", "requesting more information", "question on background"], ["INQUIRY"])],
    "williams_2017": [(["bug"], ["PROBLEM"]), (["fea"], ["INQUIRY"])],
}

In [64]:
def get_logits(texts, label_types, source_type):
    combined_logits = []
    for label_type in label_types:
        
        if source_type ==  "COMBINED":
            model_logits = get_combined_ypreds(texts, label_type)
        else:
            model_logits = get_ypreds(texts, label_type, source_type)
            
        positive_class_logits = model_logits[:, 1]
        
        combined_logits.append(positive_class_logits)
        
    return np.stack(combined_logits, axis=1)

In [91]:
raw_data_dir = "./data/raw"

supervised_cls_data_dir = "./data/supervised_cls_preds"

stanik_cls_data_dir = os.path.join(supervised_cls_data_dir, "stanik")
os.makedirs(supervised_cls_data_dir, exist_ok=True)
os.makedirs(stanik_cls_data_dir, exist_ok=True)

results = {}

# Read the files one by one, exporting them to xml
for dataset_app_name in os.listdir(raw_data_dir):
    print(dataset_app_name)
    df = pd.read_csv(os.path.join(raw_data_dir, dataset_app_name), index_col = 0)
    df.labels = df.labels.str.replace("'", "").str.replace("\]", "").str.replace("\[", "").str.split(",")
    df.labels = df.labels.apply(lambda x: [y.strip() for y in x])
    
    dataset_name = [dataset for dataset in label_maps.keys() if dataset in dataset_app_name][0]
    label_map = label_maps[dataset_name]

    true_label_set = [labels[0] for labels in label_map]
    prediction_label_set = [labels[1][0] for labels in label_map]

    true_labels = df.labels.apply(lambda x: [any([true_label in x for true_label in true_label_list]) for true_label_list in true_label_set])
    true_labels = np.asarray(true_labels.values.reshape(-1).tolist())
    
    is_all_true_cols = ~(~true_labels).all(axis=0)
    
    true_labels = true_labels[:, is_all_true_cols]
    
    app_logits = get_logits(df.text, prediction_label_set, 'APP')
    twitter_logits = get_logits(df.text, prediction_label_set, 'TWITTER')
    combined_logits = get_logits(df.text, prediction_label_set, 'COMBINED')
    
    app_logits = app_logits[:, is_all_true_cols]
    twitter_logits = twitter_logits[:, is_all_true_cols]
    combined_logits = combined_logits[:, is_all_true_cols]
    
    app_roc_auc = metrics.roc_auc_score(true_labels, app_logits)
    twitter_roc_auc = metrics.roc_auc_score(true_labels, twitter_logits)
    combined_roc_auc = metrics.roc_auc_score(true_labels, combined_logits)
    
    results[dataset_app_name[:-4]] = {
        "Stanik app model": app_roc_auc,
        "Stanik twitter model": twitter_roc_auc,
        "Stanik combined model": combined_roc_auc,
    }

chen_2014_facebook.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
chen_2014_swiftkey.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
chen_2014_tapfish.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
chen_2014_templerun2.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_2048.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_A Comic Viewer.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Abstract Art.csv
Found 8673 unique tokens.

  from ipykernel import kernelapp as app



Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_AcDisplay.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Adblock Plus.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Amaze File Manager.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Autostarts.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_BatteryBot Battery Indicator.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Calculator.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_CatLog.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Duck Duck GO.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Financius - Expense Manager.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Muzei Live Wallpaper.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Turbo Editor ( Text Editor ).csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Tweet Lanes.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Wally.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
ciurumelea_2017_Xabber.csv
Found 8673 unique tokens.


  from ipykernel import kernelapp as app


Found 11298 unique tokens.
Found 8673 unique tokens.
Found 11298 unique tokens.
di_sorbo_2016_blinq_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_cstp_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_doodlePairs_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_karaokeFree_summary.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_karaokePaid_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_lifelog_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_minesweeperReloaded_summary.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_movieCreator_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_picturexAndroid_summary.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_picturexWindowsPhone_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_powernApp_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_sheepOblock_summary.csv
Found 6943 unique tokens.

  from ipykernel import kernelapp as app



Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_sketch_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_stoneFlood_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_trackID_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_video_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_weightTrack_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
di_sorbo_2016_wifiFileTransfer_summary.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Angrybirds.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Dropbox.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Evernote.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Picsart.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Pininterest.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Tripadvisor.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
guzman_2015_Whatsapp.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#10_Fotocollage Bildbearbeitung.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#10_line gratis anrufe.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#1_Camera Zoom FX.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#1_OfficeSuite Pro 7  (PDF und HD).csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#1_PicsArt - Photo Studio.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#2_flipagram.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#3_Need for Speed Most Wanted.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#3_Seitenmanager.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#3_skype.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#4_antivirus GRATIS.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#5_Plants vs Zombies.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#5_viber.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#6_Assassins Creed Pirates.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#6_firefox.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#6_MomentCam.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#6_Photo Studio PRO.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#7_Cymera - Camera & Photo Editor.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#7_Worms 2 Armageddon.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#8_gmx mail.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#8_Wo ist mein Wasser.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#8_XDA Premium.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#9_Modern Combat 4 Zero Hour.csv
Found 6943 unique tokens.
Found 6142 unique tokens.


  from ipykernel import kernelapp as app


Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_#9_Perfect365 Gesichts-Make-Up.csv
Found 6943 unique tokens.
Found 6142 unique tokens.


  from ipykernel import kernelapp as app


Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
maalej_2016_310947683.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_acr.browser.barebones.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_air.hmbtned.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.alfray.timeriffic.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.duckduckgo.mobile.android.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.ebay.mobile.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.google.zxing.client.android.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.ringdroid.csv
Found 6943 unique tokens.
Found 6142 unique tokens.

  from ipykernel import kernelapp as app



Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.uberspot.a2048.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_com.viber.voip.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_edu.berkeley.boinc.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_org.dolphinemu.dolphinemu.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_org.linphone.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
scalabrino_2017_org.wordpress.android.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
tizard_2019_features.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
tizard_2019_fire fox.csv


  from ipykernel import kernelapp as app


Found 6943 unique tokens.
Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
tizard_2019_issue.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@android.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@applesupport.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@callofduty.csv
Found 6943 unique tokens.

  from ipykernel import kernelapp as app



Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@googlechrome.csv
Found 6943 unique tokens.

  from ipykernel import kernelapp as app



Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@instagram.csv
Found 6943 unique tokens.

  from ipykernel import kernelapp as app



Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@minecraft.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@snapchat.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@visualstudio.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@whatsapp.csv
Found 6943 unique tokens.

  from ipykernel import kernelapp as app



Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.
williams_2017_@windows.csv
Found 6943 unique tokens.


  from ipykernel import kernelapp as app


Found 6142 unique tokens.
Found 10196 unique tokens.
Found 6835 unique tokens.
Found 6943 unique tokens.
Found 10196 unique tokens.
Found 6142 unique tokens.
Found 6835 unique tokens.


In [93]:
pd.DataFrame(results).to_csv(os.path.join(stanik_cls_data_dir, "results.csv"))

In [92]:
pd.DataFrame(results)

Unnamed: 0,chen_2014_facebook,chen_2014_swiftkey,chen_2014_tapfish,chen_2014_templerun2,ciurumelea_2017_2048,ciurumelea_2017_A Comic Viewer,ciurumelea_2017_Abstract Art,ciurumelea_2017_AcDisplay,ciurumelea_2017_Adblock Plus,ciurumelea_2017_Amaze File Manager,...,williams_2017_@android,williams_2017_@applesupport,williams_2017_@callofduty,williams_2017_@googlechrome,williams_2017_@instagram,williams_2017_@minecraft,williams_2017_@snapchat,williams_2017_@visualstudio,williams_2017_@whatsapp,williams_2017_@windows
Stanik app model,0.659085,0.737586,0.8603,0.827857,0.4375,0.498412,0.716159,0.574402,0.546154,0.330882,...,0.701544,0.63928,0.674781,0.669079,0.673102,0.729534,0.686397,0.64522,0.706343,0.657469
Stanik twitter model,0.642786,0.625225,0.702898,0.7292,0.28125,0.561078,0.731394,0.497411,0.741538,0.411765,...,0.696873,0.591651,0.621373,0.649859,0.663043,0.677039,0.658754,0.650913,0.624752,0.65643
Stanik combined model,0.664099,0.718505,0.840091,0.819743,0.375,0.53141,0.742882,0.540205,0.621538,0.352941,...,0.720658,0.630052,0.65832,0.679731,0.69284,0.730812,0.691402,0.672096,0.691904,0.683462
