In [8]:
import pymongo
import random
import pickle
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from collections import Counter
from tqdm.notebook import tqdm

%run Unsupervised.ipynb
%run Analyzers.ipynb

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pierp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pierp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [9]:
client = pymongo.MongoClient("127.0.0.1:27017")

db = client['IRsegmentationDB3']
docsDataset = db['dataset']

#relDataset contains the relatedness score for each sentence combination in each doc
relDataset = db['relatedness']

## Unsupervised Evaluation

In [10]:
# check if two sentences are in the same segments
def isInTheSameSegment(ns1, ns2, SG):
    for sg in SG: 
        if(ns1 in sg and ns2 in sg):
            return True
    
    return False

def isInTheSameGoldSegment(d, s1, s2, S):
    doc = docsDataset.find_one({'doc': d})
    text = doc['text'].split("¶")
        
    for p in text:
        p = p.replace("\n", "").strip()
        
        if(p.count(s1) > 0 and p.count(s2) > 0):
            return True
    
    return False

def unsupervisedEvaluation(d, k, SG, S):
    counter = 0
    for i in range(0, 100):
        n = random.randrange(0, len(S) - k)

        s1, ns1 = S[n], n
        s2, ns2 = S[n + k], n + k
        
        #print(ns1, ns2)
        
        iss = isInTheSameSegment(ns1, ns2, SG)
        isgs = isInTheSameGoldSegment(d, s1, s2, S)
        if(iss == isgs):
            counter += 1
            
    return counter

## Unsupervised Labeling Evaluation

In [11]:
def SupervisedLabeling(SG, S, DOC):
    res = []

    # Recomposing the document based on SG
    segments = []
    
    for sg in SG: 
        segment = []
        for snt in sg: 
            segment.append(S[int(snt)])
        segments.append(segment)
        
    fx = FeatureExtraction(segments, DOC)
    fv = fx.get_feature_vector()
    
    # Functional Part Analyzer
    fpa = functionalPartAnalyzer(segments, fv, DOC)
    fpa.introduction()
    fpa.background()
    fpa.footnotes()

    res = fpa.getClassification()

    # Conclusion Recognizer
    idx = [(x[1].split("="))[1] for x in fpa.getFilteredFeatureVector()]
    
    for x in idx: 
        pfv = fx.get_feature_vector_for_sentence(x)
        cr = conclusionRecognizer(segments[int(x)], x, pfv, DOC)
        res.append(cr.recognizer())

    res = sorted(res, key=lambda d: int(d['index'])) 
    
    # Check if there are mutliple paragraph with the same labels consequently
    # if true, merge them. 
    labeling = []
    for i, r in enumerate(res):
        if(i > 0):
            if(r['label'] == labeling[len(labeling) - 1]['label']):
                labeling[len(labeling) - 1]['index'].extend([r['index']])
                continue
        r['index'] = [r['index']]
        labeling.append(r)
    
    return labeling, S, SG

In [12]:
def supervisedEvaluation(d, r, k):
    c = []
    for i in range(0, 100):
        c.append(haveSameLabel(d, r, k))
        
    return c

def haveSameLabel(d, r, k): 
    doc = docsDataset.find_one({'doc': d})
    annots = docsDataset.find_one({'doc': d})['annotations']
    text = doc['text'].split("¶")
    
    S = r[1]
    n = random.randrange(0, len(S) - k)
    
    s1, ns1 = S[n], n
    s2, ns2 = S[n + k], n + k

    # Get Predicted Labels
    pLabel1 = getLabel(ns1, r)
    #pLabel2 = getLabel(ns2, r)
    
    if(pLabel1 == "NF"): #or pLabel2 == "NF"):
        return 0

    # Get real labels
    rLabel1, rLabel2 = "NF", "NF"
    
    for p in zip(text, annots):
        prg = p[0].replace("\n", "").strip()
        if(prg.count(s1) > 0):
            rLabel1 = p[1]['type']
            
            if(p[1]['type'] == 'Analysis'):
                rLabel1 = 'Conclusions'
            
        if(prg.count(s2) > 0):
            rLabel2 = p[1]['type']
            
    if(rLabel1 == "NF"):# or rLabel2 == "NF"):
        return 0
    
    if(pLabel1 == rLabel1):
        return 1
    else: 
        return 0
    
def getLabel(ns, r):
    #ciclo SG
    for i, rr in enumerate(r[2]):
        # Trovo indice segmento in cui è presente la mia frase
        if(ns in rr):
            # ciclo i segmenti finali aggregati
            for idx in r[0]:
                # se il segmento è presente nell'aggregato ritorno l'etichetta
                if(i in idx['index']):
                    if(idx['label'] == 'Analysis'):
                        return "Conclusions"
                    
                    return idx['label']
        
    return "NF"

## Evaluation

In [32]:
DOCS = [0, 8, 21, 30, 35, 42, 48, 57, 63, 64, 86, 88, 97,99,104,109,113,115,127,128,131,132,133]
n = [7,9,11,13,15,17,19]
t = [1]
for nn in tqdm(n):
    for tt in t:
        UE = []
        SU = []
        segL = []
        for d in DOCS:
            annots = docsDataset.find_one({'doc': d})['annotations']

            # Unsupervised
            SG, S = Unsupervised(d, nn, tt)

            #Evaluation 
            # k =  distanza tra segmenti
            k = round((len(S) / 4) / len(annots))
            
            r = SupervisedLabeling(SG, S, d)
            
            UE.append((d, unsupervisedEvaluation(d, k, SG, S), len(SG), len(annots), k))
            SU.append((d, supervisedEvaluation(d, r, k)))
            segL.append(abs(len(SG) - len(annots)))
        
        with open(f'./eval/UE_{nn}_{tt}.p', 'wb') as handle:
            pickle.dump(UE, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
        with open(f'./eval/SU_{nn}_{tt}.p', 'wb') as handle:
            pickle.dump(SU, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
        #unsupervised accuracy
        accUE = 0
        for su in UE:
            accUE += su[1]
                  
        #unsupervised labeling accurcay
        accSU = 0
        for su in SU:
            accSU += sum(su[1])
        
        ##
        # Parametro n, t
        # Accuracy Unsupervised
        # Segmentation Erorr
        # Accuracy Unsupervised Labeling
        ##
        
        res.append((nn, tt, round((accUE * 100) / (len(UE) * 100), 2), sum(segL), round((accSU * 100) / (len(SU) * 100), 2)))
          

HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))



UNSUPERVISED MODULE - INITIALIZATION
N:  7
Documents tokenized
DOC:  0
LENGTH SENTENCES  43
Document 0 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [3], [4, 5, 6, 7, 8, 9, 10], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [32], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42]]
Merging Adjacent Segments ... 
[[0, 1], [3, 4, 5, 6, 7, 8, 9, 10], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]]
Merging Smal Segments ... 
[[0, 1, 3, 4, 5, 6, 7, 8, 9, 10], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]]


UNSUPERVISED MODULE - INITIALIZATION
N:  7
Documents tokenized
DOC:  8
LENGTH SENTENCES  48
Document 8 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], [64, 65, 68, 69, 70, 71, 72, 73, 74, 75], [78, 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119]]


UNSUPERVISED MODULE - INITIALIZATION
N:  7
Documents tokenized
DOC:  57
LENGTH SENTENCES  107
Document 57 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [37], [38], [39], [40, 41, 42, 43, 44, 45], [47], [48, 49, 50, 51, 52, 53, 54], [56], [57, 58], [60], [61, 62], [63], [64], [65], [66, 67], [68], [69, 70, 71, 72, 73, 74, 75, 76, 7

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81, 82, 83], [85], [86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61, 62], [63, 64], [65, 66], [68, 69], [70, 71, 72], [74, 75, 76], [79, 80, 81, 82, 83], [85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]]
Merging Smal Segments ... 
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1, 2, 3], [5], [6, 7, 8], [9], [10, 11, 12, 13, 14], [16], [17, 18]]
Merging Adjacent Segments ... 
[[0, 1, 2, 3], [5, 6, 7, 8], [9, 10, 11, 12, 13, 14], [16, 17, 18]]
Merging Smal Segments ... 
[[0, 1, 2, 3, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 16, 17, 18]]


UNSUPERVISED MODULE - INITIALIZATION
N:  7
Documents tokenized
DOC:  99
LENGTH SENTENCES  120


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [13], [14, 15], [16], [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [36], [37, 38], [39], [40, 41, 42, 43, 44, 45, 46], [47], [48], [49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80], [81, 82], [84], [85], [86], [87], [88], [89, 90, 91, 92], [94], [95], [97], [98, 99, 100, 101, 102, 103, 104, 105, 106, 107], [109], [110, 111, 112, 113, 114], [115], [116], [117], [118, 119]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [36, 37, 38], [39, 40, 41, 42, 43, 44, 45, 46], [47, 48], [49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80, 81, 82], [84, 85], [86, 87], [88, 89, 90, 91, 92], [94, 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1, 2], [3], [4, 5], [6], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], [72], [73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96], [97], [98, 99], [100], [101], [103], [104, 105], [108], [109, 110, 111, 112], [115], [116, 117], [122], [123, 124, 125, 126, 127, 128]]
Merging Adjacent Segments ... 
[[0, 1, 2], [3, 4, 5], [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], [72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96]

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [3], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [26], [27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], [57], [58, 59, 60, 61, 62], [63], [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75], [76], [77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98], [99], [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113]]
Merging Adjacent Segments ... 
[[0, 1], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], [57, 58, 59, 60, 61, 62], [63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75], [76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98], [99, 100, 101, 102, 103, 104, 105, 106, 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[2], [3, 4, 5, 6], [7], [8, 9], [12], [13, 14, 15], [16], [17, 18], [19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [31], [32, 33, 34, 35], [36], [37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [51], [52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], [65], [66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], [102], [103, 104, 105, 106, 107, 108, 109, 110, 111, 112], [115], [116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]]
Merging Adjacent Segments ... 
[[2, 3, 4, 5, 6], [7, 8, 9], [12, 13, 14, 15], [16, 17, 18], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [31, 32, 33, 34, 35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], [65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78], [79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91], [92, 93], [96], [97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114], [115], [116, 117, 118, 119], [121], [122, 123, 124, 125, 126], [128], [129, 130, 131, 132, 133, 134, 135, 136, 137], [139], [140, 141], [144], [145]]
Merging Adjacent Segments ... 
[[1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11, 12], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], [23], [24, 25, 26, 27, 28, 29], [30], [31, 32], [34], [35, 36], [38], [39, 40], [41], [42, 43, 44, 45, 46], [47], [48, 49, 50, 51, 52, 53, 54, 55], [56], [57, 58, 59, 60, 61, 62, 63], [64], [65, 66, 67, 68, 69, 70, 71, 72], [73], [74, 75, 76, 77, 78, 79, 80, 81], [82], [83, 84, 85], [87], [88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108], [112], [113, 114, 115, 116], [117], [118, 119, 120, 121, 122, 123]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], [23, 24, 25, 26, 27, 28, 29], [30, 31, 32], [34, 35, 36], [38, 39, 40], [41, 42, 43, 44, 45, 46], [47, 48, 49, 50, 51, 52, 53, 54, 55], [56, 57, 58, 59, 60, 61, 62, 63], [64, 65, 66, 67, 68, 69, 70, 71, 72], [73, 74, 75, 76, 77, 78, 79, 80, 81], [82, 83, 84, 85], [87, 88, 89, 90, 91, 92, 93

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [44], [45, 46, 47, 48, 49], [51], [52, 53, 54, 55], [57], [58, 59, 60, 61, 62, 63], [65], [66, 67], [69], [70, 71, 72, 73, 74, 75, 76], [77], [78, 79], [80], [81, 82, 83]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [44, 45, 46, 47, 48, 49], [51, 52, 53, 54, 55], [57, 58, 59, 60, 61, 62, 63], [65, 66, 67], [69, 70, 71, 72, 73, 74, 75, 76], [77, 78, 79], [80, 81, 82, 83]]
Merging Smal Segments ... 
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], [44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55], [57, 58, 59, 60,

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15], [16, 17, 18, 19, 20, 21, 22, 23]]
Merging Adjacent Segments ... 
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15, 16, 17, 18, 19, 20, 21, 22, 23]]
Merging Smal Segments ... 
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15, 16, 17, 18, 19, 20, 21, 22, 23]]


UNSUPERVISED MODULE - INITIALIZATION
N:  7
Documents tokenized
DOC:  132
LENGTH SENTENCES  257


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44], [45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71], [72, 73, 74, 75, 76, 77], [78], [79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111], [112, 113, 114, 115, 116, 117, 118, 119, 120, 121], [123], [124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165], [166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


T = AVG - STD
Computing Initial Segments ... 
[[0], [1, 2], [4], [5, 6, 7, 8, 9, 10], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27], [28], [29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40], [41], [42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], [56], [57, 58, 59, 60, 61, 62, 63, 64, 65, 66], [67], [68, 69, 70, 71], [72], [73, 74, 75, 76, 77, 78, 79, 80], [81], [82], [83], [84, 85, 86, 87, 88], [89], [90, 91, 92, 93], [94], [95, 96, 97, 98]]
Merging Adjacent Segments ... 
[[0, 1, 2], [4, 5, 6, 7, 8, 9, 10], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27], [28, 29, 30, 31, 32, 33, 34], [35, 36, 37, 38, 39, 40], [41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], [56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66], [67, 68, 69, 70, 71], [72, 73, 74, 75, 76, 77, 78, 79, 80], [81, 82], [83, 84, 85, 86, 87, 88], [89, 90, 91, 92, 93], [94, 95, 96, 97, 98]]
Merging Smal Segments ... 
[[0, 1, 2, 4, 5, 6, 7, 8, 9, 10], [12, 13, 14, 15, 16, 17

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60], [62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104], [105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134], [135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153], [154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167], [168, 169, 170, 171, 172, 173, 176, 177, 178]]


UNSUPERVISED MODULE - INITIALIZATION
N:  9
Documents tokenized
DOC:  48
LENGTH SENTENCES  120
Document 48 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 



UNSUPERVISED MODULE - INITIALIZATION
N:  9
Documents tokenized
DOC:  88
LENGTH SENTENCES  104
Document 88 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81, 82, 83], [85], [86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61, 62], [63, 64], [65, 66], [68, 69], [70, 71, 72], [74, 75, 76], [79, 80, 81, 82



UNSUPERVISED MODULE - INITIALIZATION
N:  9
Documents tokenized
DOC:  115
LENGTH SENTENCES  148
Document 115 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78], [79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91], [92, 93], [96], [97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114], [115], [116, 117, 118, 119], [121], [122, 123, 124, 125, 126], [128], [129, 130, 131, 132, 133, 134, 135, 136, 137], [139], [140, 141], [144], [145]]
Merging Adjacent Segments ... 
[[1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11, 12], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60], [62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104], [105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134], [135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153], [154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167], [168, 169, 170, 171, 172, 173, 176, 177, 178]]


UNSUPERVISED MODULE - INITIALIZATION
N:  11
Documents tokenized
DOC:  48
LENGTH SENTENCES  120
Document 48 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4,



UNSUPERVISED MODULE - INITIALIZATION
N:  11
Documents tokenized
DOC:  88
LENGTH SENTENCES  104
Document 88 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81, 82, 83], [85], [86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61, 62], [63, 64], [65, 66], [68, 69], [70, 71, 72], [74, 75, 76], [79, 80, 81, 8



UNSUPERVISED MODULE - INITIALIZATION
N:  11
Documents tokenized
DOC:  115
LENGTH SENTENCES  148
Document 115 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78], [79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91], [92, 93], [96], [97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114], [115], [116, 117, 118, 119], [121], [122, 123, 124, 125, 126], [128], [129, 130, 131, 132, 133, 134, 135, 136, 137], [139], [140, 141], [144], [145]]
Merging Adjacent Segments ... 
[[1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11, 12], [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64], [65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104], [105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138], [140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153], [154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167], [168, 169, 170, 171, 172, 173, 176, 177, 178]]


UNSUPERVISED MODULE - INITIALIZATION
N:  13
Documents tokenized
DOC:  48
LENGTH SENTENCES  120
Document 48 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4,



UNSUPERVISED MODULE - INITIALIZATION
N:  13
Documents tokenized
DOC:  88
LENGTH SENTENCES  104
Document 88 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81, 82, 83], [85], [86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61, 62], [63, 64], [65, 66], [68, 69], [70, 71, 72], [74, 75, 76], [79, 80, 81, 8

[[2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], [65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]]


UNSUPERVISED MODULE - INITIALIZATION
N:  13
Documents tokenized
DOC:  115
LENGTH SENTENCES  148
Document 115 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138], [140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158], [159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173], [176, 177, 178]]


UNSUPERVISED MODULE - INITIALIZATION
N:  15
Documents tokenized
DOC:  48
LENGTH SENTENCES  120
Document 48 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 



UNSUPERVISED MODULE - INITIALIZATION
N:  15
Documents tokenized
DOC:  88
LENGTH SENTENCES  104
Document 88 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81, 82, 83], [85], [86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103]]
Merging Adjacent Segments ... 
[[0, 1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61, 62], [63, 64], [65, 66], [68, 69], [70, 71, 72], [74, 75, 76], [79, 80, 81, 8

[[2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], [65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]]


UNSUPERVISED MODULE - INITIALIZATION
N:  15
Documents tokenized
DOC:  115
LENGTH SENTENCES  148
Document 115 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68, 

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142], [143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167], [168, 169, 170, 171, 172, 173, 176, 177, 178]]


UNSUPERVISED MODULE - INITIALIZATION
N:  17
Documents tokenized
DOC:  48
LENGTH SENTENCES  120
Document 48 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6,

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], [56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110]]


UNSUPERVISED MODULE - INITIALIZATION
N:  17
Documents tokenized
DOC:  88
LENGTH SENTENCES  104
Document 88 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81,

[[2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]]


UNSUPERVISED MODULE - INITIALIZATION
N:  17
Documents tokenized
DOC:  115
LENGTH SENTENCES  148
Document 115 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68, 69

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90], [91, 92, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142], [143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167], [168, 169, 170, 171, 172, 173, 176, 177, 178]]


UNSUPERVISED MODULE - INITIALIZATION
N:  19
Documents tokenized
DOC:  48
LENGTH SENTENCES  120
Document 48 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6,

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], [56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110]]


UNSUPERVISED MODULE - INITIALIZATION
N:  19
Documents tokenized
DOC:  88
LENGTH SENTENCES  104
Document 88 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[0], [1], [2], [3, 4, 5, 6, 7, 8, 9, 10, 11], [12], [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33], [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], [61], [62], [63], [64], [65], [66], [68], [69], [70], [71, 72], [74], [75, 76], [79], [80, 81,

[[2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]]


UNSUPERVISED MODULE - INITIALIZATION
N:  19
Documents tokenized
DOC:  115
LENGTH SENTENCES  148
Document 115 already in DB. No relatedness computation needed.
T = AVG - STD
Computing Initial Segments ... 
[[1], [2, 3], [4], [5, 6, 7, 8, 9, 10, 11, 12], [14], [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [35], [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60], [61, 62, 63, 64, 65, 66, 67, 68, 69

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164], [165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 

In [33]:
res

[(7, 1, 65.13, 48, 53.91),
 (9, 1, 64.26, 48, 52.09),
 (11, 1, 63.78, 51, 51.91),
 (13, 1, 66.0, 54, 48.91),
 (15, 1, 66.17, 63, 46.87),
 (17, 1, 63.61, 70, 39.74),
 (19, 1, 65.09, 73, 38.48)]