# Supervised Model Cross Validation

In [1]:
import pymongo
import numpy as np
import json
import import_ipynb
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, cross_val_score

%run Introduction.ipynb
%run Background.ipynb
%run Footnotes.ipynb
%run Conclusion.ipynb

%run Classification.ipynb

In [2]:
#Connection to mongoDB
client = pymongo.MongoClient("127.0.0.1:27017")

db = client['IRsegmentationDB']
pDataset = db['pDataset']
bDataset = db['bDataset']

In [9]:
docs = [x['text'] for x in pDataset.find()][:170]
annot = [x['annotations'] for x in pDataset.find()][:170]
indexes = [x['doc'] for x in pDataset.find()][:170]

train_size = 0.8
random_state = 42

### Training Functions

In [None]:
def functional_segmenter(d, a):
    # Instatiating Introduction Classifier
    cIntro = IntroductionClassifier(d, a)
    cIntro.train_test_split(random_state, train_size)
    
    cIntro.train()
    cIntro.test()
    
    # Instantiating Background Classifier
    cBackground = BackgroundClassifier(d, a)
    cBackground.train_test_split(random_state, train_size)
    cBackground.train()
    cBackground.test()
    
    # Instatiating Footnotes Classifier
    cFootnotes = FootnotesClassifier(d, a)
    cFootnotes.train_test_split(random_state, train_size)
    cFootnotes.train()
    cFootnotes.test()
    
def conclusion_recognizer(d, a):
    cConclusion = ConclusionRecognizer(d, a)
    cConclusion.train_test_split(random_state, train_size)
    cConclusion.train()
    cConclusion.test()
    
def testing(d, a, i):
    classifier = Classification(d, a, i)
    classifier.set()
    return classifier.get_results()

### Cross Validation

In [11]:
dim = (round(len(docs) / 10))

# Dividing in Folds
data = []
tmp = []
for i in range(0, len(docs)):
    tmp.append(i)
    if(len(tmp) == dim or i == len(docs) - 1):
        data.append(tmp)
        tmp = []

In [None]:
res = []

for i in range(0, len(data)):
    train_t = [x for j, x in enumerate(data) if j != i]
    train = []
    
    for x in train_t:
        train.extend(x)
        
    docs_cv = [docs[x] for x in train]
    annot_cv = [annot[x] for x in train]
    indexes_cv = [indexes[x] for x in train]
    
    print(str(i), " FOLD EVALUATION")
    
    functional_segmenter(docs_cv, annot_cv)
    conclusion_recognizer(docs_cv, annot_cv)
    
    test = data[i]
    docs_test_cv = [docs[x] for x in test]
    annot_test_cv = [annot[x] for x in test]
    indexes_test_cv = [indexes[x] for x in test]
    
    print("TESTING")
    res.append(testing(docs_test_cv, annot_test_cv, indexes_test_cv))

0  FOLD EVALUATION
Start INTRODUCTION
Start BACKGROUND
Start FOOTNOTES
Start CONCLUSIONS
