In [1]:
import json
import os

import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *

from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.ml import Pipeline,PipelineModel

import pandas as pd

params = {"spark.driver.memory":"4G", 
          "spark.kryoserializer.buffer.max":"2000M", 
          "spark.driver.maxResultSize":"2000M"}

spark = sparknlp.start()
spark

In [2]:
from sparknlp.base import LightPipeline

documentAssembler = DocumentAssembler()\
		.setInputCol("text")\
		.setOutputCol("document")

tokenizer = Tokenizer()\
		.setInputCols(["document"])\
		.setOutputCol("token")
	
embeddings = WordEmbeddingsModel.pretrained('glove_100d') \
		.setInputCols(["document", 'token']) \
		.setOutputCol("embeddings")

ner = NerDLModel.pretrained("ner_dl", 'en') \
		.setInputCols(["document", "token", "embeddings"]) \
		.setOutputCol("ner")

ner_pipeline = Pipeline().setStages([
				documentAssembler,
				tokenizer,
				embeddings,
				ner
    ])

ner_model_pipeline = ner_pipeline.fit(spark.createDataFrame([[""]]).toDF("text"))

glove_100d download started this may take some time.
Approximate size to download 145.3 MB
[OK!]
ner_dl download started this may take some time.
Approximate size to download 13.6 MB
[OK!]


### When Config.yml provided by user

In [3]:
from nlptest.nlptest import Harness

h = Harness("ner", model= ner_model_pipeline, data="test.conll", config_path = 'config.yml')

In [4]:
h.generate()

Unnamed: 0,Orginal,Test_Case,Test_type
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",uppercase
1,Nadim Ladki,NADIM LADKI,uppercase
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , UNITED ARAB EMIRATES 1996-12-06",uppercase
3,Japan began the defence of their Asian Cup tit...,JAPAN BEGAN THE DEFENCE OF THEIR ASIAN CUP TIT...,uppercase
4,But China saw their luck desert them in the se...,BUT CHINA SAW THEIR LUCK DESERT THEM IN THE SE...,uppercase
...,...,...,...
25,on Friday for their friendly against Scotland ...,on Friday for their friendly against Scotland ...,add_context
26,"Cuttitta , who trainer George Coste said was c...","Cuttitta , who trainer George Coste said was c...",add_context
27,Stefano Bordon is out through illness and Cost...,Stefano Bordon is out through illness and Cost...,add_context
28,Cuttitta announced his retirement after the 19...,Cuttitta announced his retirement after the 19...,add_context


In [5]:
h.run()

Unnamed: 0,Orginal,Test_Case,Test_type,expected_result,actual_result,is_pass
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",uppercase,"[O, O, B-LOC, O, O, O, O, B-LOC, O, O, O, O]","[O, O, B-LOC, O, O, O, O, B-LOC, O, O, O, O]",True
1,Nadim Ladki,NADIM LADKI,uppercase,"[B-ORG, I-ORG]","[B-ORG, I-ORG]",True
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , UNITED ARAB EMIRATES 1996-12-06",uppercase,"[B-LOC, O, B-LOC, I-LOC, I-LOC, O]","[B-LOC, O, B-LOC, I-LOC, I-LOC, O]",True
3,Japan began the defence of their Asian Cup tit...,JAPAN BEGAN THE DEFENCE OF THEIR ASIAN CUP TIT...,uppercase,"[B-LOC, O, O, O, O, O, B-MISC, I-MISC, O, O, O...","[B-LOC, O, O, O, O, O, B-MISC, I-MISC, O, O, O...",True
4,But China saw their luck desert them in the se...,BUT CHINA SAW THEIR LUCK DESERT THEM IN THE SE...,uppercase,"[O, B-LOC, O, O, O, O, O, O, O, O, O, O, O, O,...","[O, B-LOC, O, O, B-MISC, I-MISC, O, O, O, O, O...",False
...,...,...,...,...,...,...
25,on Friday for their friendly against Scotland ...,on Friday for their friendly against Scotland ...,add_context,"[O, O, O, O, O, O, B-LOC, O, B-LOC, O, O, O, O...","[O, O, O, O, O, O, B-LOC, O, B-LOC, O, O, O, O...",True
26,"Cuttitta , who trainer George Coste said was c...","Cuttitta , who trainer George Coste said was c...",add_context,"[B-PER, O, O, O, B-PER, I-PER, O, O, O, O, O, ...","[B-PER, O, O, O, B-PER, I-PER, O, O, O, O, O, ...",True
27,Stefano Bordon is out through illness and Cost...,Stefano Bordon is out through illness and Cost...,add_context,"[B-PER, I-PER, O, O, O, O, O, B-PER, O, O, O, ...","[B-PER, I-PER, O, O, O, O, O, B-PER, O, O, O, ...",True
28,Cuttitta announced his retirement after the 19...,Cuttitta announced his retirement after the 19...,add_context,"[B-PER, O, O, O, O, O, O, B-MISC, I-MISC, O, O...","[B-PER, O, O, O, O, O, O, B-MISC, I-MISC, O, O...",True


In [6]:
h.save(config='trial_config.yml', testcases="demo_cases.csv", results="demo_results.csv")

In [7]:
h.report()

Test_type    is_pass
add_context  True       34
             False      26
lowercase    False      27
             True        3
uppercase    True       19
             False      11
Name: is_pass, dtype: int64

### When config.yml wasn't provided by user

In [8]:
h2 = Harness("ner", model= ner_model_pipeline, data="test.conll")
h2.configure({
    'tasks': ['ner', 'text_classifer'],
    'lang': ['en', 'du', 'it'],
    'tests_types': ['uppercase',
                    'lowercase',
                    {'add_context': [
                        {'starting_context': ['Hello', 'Dated: 21/02/2022']},
                        {'ending_context': ['Bye', 'Reported']}
                        ]
                    }]
})

{'tasks': ['ner', 'text_classifer'],
 'lang': ['en', 'du', 'it'],
 'tests_types': ['uppercase',
  'lowercase',
  {'add_context': [{'starting_context': ['Hello', 'Dated: 21/02/2022']},
    {'ending_context': ['Bye', 'Reported']}]}]}

In [9]:
h2.generate()

Unnamed: 0,Orginal,Test_Case,Test_type
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",uppercase
1,Nadim Ladki,NADIM LADKI,uppercase
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , UNITED ARAB EMIRATES 1996-12-06",uppercase
3,Japan began the defence of their Asian Cup tit...,JAPAN BEGAN THE DEFENCE OF THEIR ASIAN CUP TIT...,uppercase
4,But China saw their luck desert them in the se...,BUT CHINA SAW THEIR LUCK DESERT THEM IN THE SE...,uppercase
...,...,...,...
25,on Friday for their friendly against Scotland ...,on Friday for their friendly against Scotland ...,add_context
26,"Cuttitta , who trainer George Coste said was c...","Cuttitta , who trainer George Coste said was c...",add_context
27,Stefano Bordon is out through illness and Cost...,Stefano Bordon is out through illness and Cost...,add_context
28,Cuttitta announced his retirement after the 19...,Cuttitta announced his retirement after the 19...,add_context


In [10]:
h2.run()

Unnamed: 0,Orginal,Test_Case,Test_type,expected_result,actual_result,is_pass
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",uppercase,"[O, O, B-LOC, O, O, O, O, B-LOC, O, O, O, O]","[O, O, B-LOC, O, O, O, O, B-LOC, O, O, O, O]",True
1,Nadim Ladki,NADIM LADKI,uppercase,"[B-ORG, I-ORG]","[B-ORG, I-ORG]",True
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , UNITED ARAB EMIRATES 1996-12-06",uppercase,"[B-LOC, O, B-LOC, I-LOC, I-LOC, O]","[B-LOC, O, B-LOC, I-LOC, I-LOC, O]",True
3,Japan began the defence of their Asian Cup tit...,JAPAN BEGAN THE DEFENCE OF THEIR ASIAN CUP TIT...,uppercase,"[B-LOC, O, O, O, O, O, B-MISC, I-MISC, O, O, O...","[B-LOC, O, O, O, O, O, B-MISC, I-MISC, O, O, O...",True
4,But China saw their luck desert them in the se...,BUT CHINA SAW THEIR LUCK DESERT THEM IN THE SE...,uppercase,"[O, B-LOC, O, O, O, O, O, O, O, O, O, O, O, O,...","[O, B-LOC, O, O, B-MISC, I-MISC, O, O, O, O, O...",False
...,...,...,...,...,...,...
25,on Friday for their friendly against Scotland ...,on Friday for their friendly against Scotland ...,add_context,"[O, O, O, O, O, O, B-LOC, O, B-LOC, O, O, O, O...","[O, O, O, O, O, O, B-LOC, O, B-LOC, O, O, O, O...",True
26,"Cuttitta , who trainer George Coste said was c...","Cuttitta , who trainer George Coste said was c...",add_context,"[B-PER, O, O, O, B-PER, I-PER, O, O, O, O, O, ...","[B-PER, O, O, O, B-PER, I-PER, O, O, O, O, O, ...",True
27,Stefano Bordon is out through illness and Cost...,Stefano Bordon is out through illness and Cost...,add_context,"[B-PER, I-PER, O, O, O, O, O, B-PER, O, O, O, ...","[B-PER, I-PER, O, O, O, O, O, B-PER, O, O, O, ...",True
28,Cuttitta announced his retirement after the 19...,Cuttitta announced his retirement after the 19...,add_context,"[B-PER, O, O, O, O, O, O, B-MISC, I-MISC, O, O...","[B-PER, O, O, O, O, O, O, B-MISC, I-MISC, O, O...",True


In [11]:
h2.save()

In [12]:
h2.report()

Test_type    is_pass
add_context  True       32
             False      28
lowercase    False      27
             True        3
uppercase    True       19
             False      11
Name: is_pass, dtype: int64

### Different Test_Types 

In [13]:
h3 = Harness("ner", model= ner_model_pipeline, data="test.conll")
h3.configure({
    'tasks': ['ner', 'text_classifer'],
    'lang': ['en', 'du', 'it'],
    'tests_types': [{'add_context': [
                        {'starting_context': ['Hello', 'Dated: 21/02/2022']},
                        {'ending_context': ['Bye', 'Reported']}
                        ]
                    }]
})

{'tasks': ['ner', 'text_classifer'],
 'lang': ['en', 'du', 'it'],
 'tests_types': [{'add_context': [{'starting_context': ['Hello',
      'Dated: 21/02/2022']},
    {'ending_context': ['Bye', 'Reported']}]}]}

In [14]:
h3.generate()

Unnamed: 0,Orginal,Test_Case,Test_type
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",add_context
1,Nadim Ladki,Nadim Ladki Reported,add_context
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , United Arab Emirates 1996-12-06 Reported",add_context
3,Japan began the defence of their Asian Cup tit...,Japan began the defence of their Asian Cup tit...,add_context
4,But China saw their luck desert them in the se...,But China saw their luck desert them in the se...,add_context
5,China controlled most of the match and saw sev...,China controlled most of the match and saw sev...,add_context
6,Oleg Shatskiku made sure of the win in injury ...,Oleg Shatskiku made sure of the win in injury ...,add_context
7,The former Soviet republic was playing in an A...,The former Soviet republic was playing in an A...,add_context
8,Despite winning the Asian Games title two year...,Despite winning the Asian Games title two year...,add_context
9,Two goals from defensive errors in the last si...,Two goals from defensive errors in the last si...,add_context


In [15]:
h3.run()

Unnamed: 0,Orginal,Test_Case,Test_type,expected_result,actual_result,is_pass
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",add_context,"[O, O, B-LOC, O, O, O, O, B-LOC, O, O, O, O]","[O, O, B-LOC, O, O, O, O, B-LOC, O, O, O, O]",True
1,Nadim Ladki,Nadim Ladki Reported,add_context,"[B-ORG, I-ORG]","[B-ORG, I-ORG]",True
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , United Arab Emirates 1996-12-06 Reported",add_context,"[B-LOC, O, B-LOC, I-LOC, I-LOC, O]","[B-LOC, O, B-LOC, I-LOC, I-LOC, O]",True
3,Japan began the defence of their Asian Cup tit...,Japan began the defence of their Asian Cup tit...,add_context,"[B-LOC, O, O, O, O, O, B-MISC, I-MISC, O, O, O...","[B-LOC, O, O, O, O, O, B-MISC, I-MISC, O, O, O...",True
4,But China saw their luck desert them in the se...,But China saw their luck desert them in the se...,add_context,"[O, B-LOC, O, O, O, O, O, O, O, O, O, O, O, O,...","[O, B-LOC, O, O, O, O, O, O, O, O, O, O, O, O,...",True
5,China controlled most of the match and saw sev...,China controlled most of the match and saw sev...,add_context,"[B-LOC, O, O, O, O, O, O, O, O, O, O, O, O, O,...","[B-LOC, O, O, O, O, O, O, O, O, O, O, O, O, O,...",True
6,Oleg Shatskiku made sure of the win in injury ...,Oleg Shatskiku made sure of the win in injury ...,add_context,"[B-PER, I-PER, O, O, O, O, O, O, O, O, O, O, O...","[B-PER, I-PER, O, O, O, O, O, O, O, O, O, O, O...",True
7,The former Soviet republic was playing in an A...,The former Soviet republic was playing in an A...,add_context,"[O, O, B-MISC, O, O, O, O, O, B-MISC, I-MISC, ...","[O, O, B-MISC, O, O, O, O, O, B-MISC, I-MISC, ...",True
8,Despite winning the Asian Games title two year...,Despite winning the Asian Games title two year...,add_context,"[O, O, O, B-MISC, I-MISC, O, O, O, O, O, B-LOC...","[O, O, O, B-MISC, I-MISC, O, O, O, O, O, B-LOC...",True
9,Two goals from defensive errors in the last si...,Two goals from defensive errors in the last si...,add_context,"[O, O, O, O, O, O, O, O, O, O, O, B-LOC, O, O,...","[O, O, O, O, O, O, O, O, O, O, O, B-LOC, O, O,...",True


# Spacy Model

In [16]:
import spacy 
from nlptest.nlptest import Harness

model = spacy.load('en_core_web_sm')

In [17]:
spacy_h = Harness("ner", model= model, data="test.conll")
spacy_h.configure({
    'tasks': ['ner', 'text_classifer'],
    'lang': ['en', 'du', 'it'],
    'tests_types': ['uppercase', 'lowercase']
    
})

{'tasks': ['ner', 'text_classifer'],
 'lang': ['en', 'du', 'it'],
 'tests_types': ['uppercase', 'lowercase']}

In [18]:
spacy_h.generate()

Unnamed: 0,Orginal,Test_Case,Test_type
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",uppercase
1,Nadim Ladki,NADIM LADKI,uppercase
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , UNITED ARAB EMIRATES 1996-12-06",uppercase
3,Japan began the defence of their Asian Cup tit...,JAPAN BEGAN THE DEFENCE OF THEIR ASIAN CUP TIT...,uppercase
4,But China saw their luck desert them in the se...,BUT CHINA SAW THEIR LUCK DESERT THEM IN THE SE...,uppercase
5,China controlled most of the match and saw sev...,CHINA CONTROLLED MOST OF THE MATCH AND SAW SEV...,uppercase
6,Oleg Shatskiku made sure of the win in injury ...,OLEG SHATSKIKU MADE SURE OF THE WIN IN INJURY ...,uppercase
7,The former Soviet republic was playing in an A...,THE FORMER SOVIET REPUBLIC WAS PLAYING IN AN A...,uppercase
8,Despite winning the Asian Games title two year...,DESPITE WINNING THE ASIAN GAMES TITLE TWO YEAR...,uppercase
9,Two goals from defensive errors in the last si...,TWO GOALS FROM DEFENSIVE ERRORS IN THE LAST SI...,uppercase


In [19]:
spacy_h.run()

Unnamed: 0,Orginal,Test_Case,Test_type,expected_result,actual_result,is_pass
0,"SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...","SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRI...",uppercase,"[O, O, O, O, O, ORG, O, O, O, O, ORG, O]","[O, O, O, O, O, ORG, O, O, O, O, ORG, O]",True
1,Nadim Ladki,NADIM LADKI,uppercase,"[GPE, O]","[ORG, ORG]",False
2,"AL-AIN , United Arab Emirates 1996-12-06","AL-AIN , UNITED ARAB EMIRATES 1996-12-06",uppercase,"[ORG, ORG, ORG, O, GPE, GPE, GPE, DATE, DATE, ...","[ORG, ORG, ORG, O, GPE, GPE, O, DATE, DATE, DA...",False
3,Japan began the defence of their Asian Cup tit...,JAPAN BEGAN THE DEFENCE OF THEIR ASIAN CUP TIT...,uppercase,"[GPE, O, O, O, O, O, EVENT, EVENT, O, O, O, O,...","[GPE, O, O, O, O, O, O, O, O, O, O, O, CARDINA...",False
4,But China saw their luck desert them in the se...,BUT CHINA SAW THEIR LUCK DESERT THEM IN THE SE...,uppercase,"[O, GPE, O, O, O, O, O, O, O, ORDINAL, O, O, O...","[ORG, ORG, ORG, O, O, O, O, O, O, ORDINAL, O, ...",False
5,China controlled most of the match and saw sev...,CHINA CONTROLLED MOST OF THE MATCH AND SAW SEV...,uppercase,"[GPE, O, O, O, O, O, O, O, O, O, O, O, TIME, T...","[O, O, O, O, O, O, O, O, O, O, O, O, O, CARDIN...",False
6,Oleg Shatskiku made sure of the win in injury ...,OLEG SHATSKIKU MADE SURE OF THE WIN IN INJURY ...,uppercase,"[PERSON, PERSON, O, O, O, O, O, O, O, O, O, O,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",False
7,The former Soviet republic was playing in an A...,THE FORMER SOVIET REPUBLIC WAS PLAYING IN AN A...,uppercase,"[O, O, NORP, O, O, O, O, O, EVENT, EVENT, O, O...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",False
8,Despite winning the Asian Games title two year...,DESPITE WINNING THE ASIAN GAMES TITLE TWO YEAR...,uppercase,"[O, O, O, NORP, O, O, DATE, DATE, DATE, O, GPE...","[O, O, EVENT, EVENT, EVENT, O, DATE, DATE, DAT...",False
9,Two goals from defensive errors in the last si...,TWO GOALS FROM DEFENSIVE ERRORS IN THE LAST SI...,uppercase,"[CARDINAL, O, O, O, O, O, TIME, TIME, TIME, TI...","[CARDINAL, O, O, O, O, O, TIME, TIME, TIME, TI...",False


In [20]:
spacy_h.report()

Test_type  is_pass
lowercase  False      22
           True        8
uppercase  False      23
           True        7
Name: is_pass, dtype: int64

In [21]:
h.report()

Test_type    is_pass
add_context  True       34
             False      26
lowercase    False      27
             True        3
uppercase    True       19
             False      11
Name: is_pass, dtype: int64