# Full System Test
This notebook is for testing the evaluation. 

Following is just some setup code which can be ignored.

In [4]:
import numpy as np
import spacy
from essay_evaluation.lexical_density import LexicalDensityFeatures
from essay_evaluation.lexical_sophistication import LexicalSophisticationFeatures
from essay_evaluation.lexical_variation import LexicalVariationFeatures
from essay_evaluation.lexical_accuracy import LexicalAccuracy, SpellChecker, CollocationPreprocessor, \
    CollocationDectector, CollocationEvaluator
from essay_evaluation.collocational_aspects import CollocationalAspects
from essay_evaluation.formative_feedback_evaluator import binning_indicies, FormativeFeedbackEvaluator
from essay_evaluation.pipeline import FeatureCollector
from pprint import pprint

all_feature_names = LexicalVariationFeatures.feature_names + LexicalSophisticationFeatures.feature_names + \
                LexicalAccuracy.feature_names + CollocationalAspects.feature_names + LexicalDensityFeatures.feature_names
feature_names = [all_feature_names[index] for index in binning_indicies]

def create_pipeline():
    nlp = spacy.load('en_core_web_sm')

    # add all required components
    spell_checker = SpellChecker()
    nlp.add_pipe(spell_checker, name=spell_checker.name, last=True)

    col_preproc = CollocationPreprocessor()
    nlp.add_pipe(col_preproc, name=col_preproc.name, last=True)

    col_detect = CollocationDectector()
    nlp.add_pipe(col_detect, name=col_detect.name, last=True)

    col_evaluator = CollocationEvaluator()
    nlp.add_pipe(col_evaluator, name=col_evaluator.name, last=True)

    # Add all feature extractors:
    lvf = LexicalVariationFeatures()
    lsf = LexicalSophisticationFeatures()
    ldf = LexicalDensityFeatures()
    la = LexicalAccuracy()
    ca = CollocationalAspects()

    nlp.add_pipe(lvf, name=lvf.name, last=True)
    nlp.add_pipe(lsf, name=lsf.name, last=True)
    nlp.add_pipe(la, name=la.name, last=True)
    nlp.add_pipe(ca, name=ca.name, last=True)
    nlp.add_pipe(ldf, name=ldf.name, last=True)

    feature_collector = FeatureCollector()
    nlp.add_pipe(feature_collector, name=feature_collector.name, last=True)
    return nlp

nlp = create_pipeline()

In [6]:
def get_feedback(text, level):
    doc = nlp(text)
    feature_matrix = np.array([doc._.features])
    ffe = FormativeFeedbackEvaluator()
    feedback = ffe(feature_matrix, level)
    feedback = {feature: feedback for (feature, feedback) in zip(feature_names, feedback) }
    return feedback, doc

def analyze(doc):
    print("Collocations")
    pprint([str(col) for col in doc._.collocations])
    print("Collocations Errors")
    pprint([str(col) for col in doc._.collocation_errors])
    print("Spelling Mistakes")
    pprint(doc._.spell_errors)
    print("Feature Index Values")
    pprint({index: value for (index, value) in zip(all_feature_names, doc._.features)})
    

## Usage
You can use the `feedback, doc = get_feedback(text,level)` method to retreive a dictionary containing the feature 
index and -1 for negative, 0 for neutral and 1 for positive feedback. This function also returns the 
spacy document. The `analyze(doc)` function prints out the collocation, collocation errors and 
spelling mistakes in order to debug the resulting feedback.

In [8]:
feedback, doc = get_feedback("Hello World!", "a1")
analyze(doc)
feedback

Collocations
[]
Collocations Errors
[]
Spelling Mistakes
[]
Feature Index Values
{'CA_BIN1_R': 0,
 'CA_BIN2_R': 0,
 'CA_BIN3_R': 0,
 'LA_COL_ERR_R': 0,
 'LA_ER': 0.0,
 'LD_GRUR': 0.3333333333333333,
 'LD_LXUR': 0.0,
 'LS_FOMN_BS': 0,
 'LS_FOMN_CA': 298.0,
 'LS_FOMN_NA': 0,
 'LS_FOMN_NG': 0,
 'LS_FOMN_TC': 0,
 'LS_FPC_BS': 0.0,
 'LS_FPC_CA': 0.0,
 'LS_FPC_CGA1': 0.0,
 'LS_FPC_CGA2': 0.0,
 'LS_FPC_CGA3': 0.3333333333333333,
 'LS_FPC_CT': 0.0,
 'LS_FPC_NA': 0.0,
 'LS_FPC_NG': 0.6666666666666666,
 'LS_FPC_TC': 0.0,
 'LV_CTTR': 0,
 'LV_DUGA': 0,
 'LV_HDD': 0.0,
 'LV_MAAS': 0,
 'LV_MATTR': 0,
 'LV_MSTTR': 0,
 'LV_MTLD': 0.0,
 'LV_RTTR': 0,
 'LV_SUMM': 0.0,
 'LV_TTR': 0,
 'LV_W': 0,
 'LV_WT': 0,
 'LV_WT1': 0,
 'LV_YULEK': 0.0}


{'LV_HDD': -1,
 'LS_FPC_CA': 0,
 'LS_FPC_CT': -1,
 'LS_FPC_CGA1': -1,
 'LS_FPC_CGA2': 0,
 'LS_FPC_CGA3': 1,
 'LA_ER': 0,
 'LA_COL_ERR_R': 1,
 'CA_BIN1_R': 1,
 'CA_BIN2_R': -1,
 'CA_BIN3_R': -1,
 'LD_LXUR': -1}

In [9]:
text2 = "Wilberforce's speech on 30 June 1860 was good-humoured and witty, but was an unfair attack on Darwinism, ending in the now infamous question to Huxley of whether \"it was through his grandfather or grandmother that he claimed descent from a monkey.\" Some commentators suggested that this question was written by Owen, and others suggested that the bishop was taught by Owen."
feedback, doc = get_feedback(text2, "a1")
analyze(doc)
feedback


Collocations
['speech_NOUN<--[ NOUN+VERB (14.262473883594401) ]--be_VERB',
 'commentator_NOUN<--[ NOUN+VERB (0.9998283007569059) ]--suggest_VERB',
 'other_NOUN<--[ NOUN+VERB (None) ]--suggest_VERB',
 'Wilberforces_NOUN<--[ NOUN+NOUN (None) ]--speech_NOUN',
 'grandmother_NOUN<--[ NOUN+NOUN (None) ]--grandfather_NOUN',
 'unfair_ADJ<--[ ADJ+NOUN (1.4265423231237808) ]--attack_NOUN',
 'infamous_ADJ<--[ ADJ+NOUN (None) ]--question_NOUN',
 'attack_NOUN<--[ VERB+NOUN (7.8036437881778316) ]--be_VERB',
 'descent_NOUN<--[ VERB+NOUN (1.9255911974011508) ]--claim_VERB',
 'question_NOUN<--[ VERB+NOUN (0.37029217508548135) ]--write_VERB',
 'bishop_NOUN<--[ VERB+NOUN (None) ]--teach_VERB',
 'humoured_ADJ<--[ VERB+ADJ (None) ]--be_VERB',
 'good_ADV<--[ ADV+ADJ (None) ]--humoured_ADJ',
 'now_ADV<--[ ADV+ADJ (None) ]--infamous_ADJ']
Collocations Errors
['other_NOUN<--[ NOUN+VERB (None) ]--suggest_VERB',
 'Wilberforces_NOUN<--[ NOUN+NOUN (None) ]--speech_NOUN',
 'grandmother_NOUN<--[ NOUN+NOUN (None) ]--

{'LV_HDD': -1,
 'LS_FPC_CA': 0,
 'LS_FPC_CT': 0,
 'LS_FPC_CGA1': 1,
 'LS_FPC_CGA2': 1,
 'LS_FPC_CGA3': 0,
 'LA_ER': 0,
 'LA_COL_ERR_R': 0,
 'CA_BIN1_R': -1,
 'CA_BIN2_R': 0,
 'CA_BIN3_R': -1,
 'LD_LXUR': -1}