In [1]:
from nlptest import Harness

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *

spark = sparknlp.start()
spark

In [3]:
documentAssembler = DocumentAssembler()\
		.setInputCol("text")\
		.setOutputCol("document")

tokenizer = Tokenizer()\
		.setInputCols(["document"])\
		.setOutputCol("token")
	
embeddings = WordEmbeddingsModel.pretrained('glove_100d') \
		.setInputCols(["document", 'token']) \
		.setOutputCol("embeddings")

ner = NerDLModel.pretrained("ner_dl", 'en') \
		.setInputCols(["document", "token", "embeddings"]) \
		.setOutputCol("ner")

ner_pipeline = Pipeline().setStages([
				documentAssembler,
				tokenizer,
				embeddings,
				ner
    ])
ner_model = ner_pipeline.fit(spark.createDataFrame([[""]]).toDF("text"))

glove_100d download started this may take some time.
Approximate size to download 145.3 MB
[OK!]
ner_dl download started this may take some time.
Approximate size to download 13.6 MB
[OK!]


In [4]:
h3 = Harness(task="ner", model=ner_model, hub="johnsnowlabs", data="demo/data/train.conll")
h3.configure("demo/data/config.yml")

{'defaults': {'min_pass_rate': 0.65},
 'tests': {'accuracy': {'min_micro_f1_score': {'min_score': 0.5},
   'min_macro_f1_score': {'min_score': 0.5},
   'min_weighted_f1_score': {'min_score': 0.5},
   'min_precision_score': {'min_score': {'O': 0.5,
     'LOC': 0.9,
     'PER': 0.8,
     'MISC': 0.8,
     'ORG': 0.8}},
   'min_recall_score': {'min_score': 0.75},
   'min_f1_score': {'min_score': 0.8}},
  'fairness': {'min_gender_f1_score': {'min_score': 0.5},
   'max_gender_f1_score': {'max_score': 0.99}},
  'bias': {'replace_to_female_pronouns': {'min_pass_rate': 0.65},
   'replace_to_low_income_country': {'min_pass_rate': 0.7},
   'replace_to_white_lastnames': {'min_pass_rate': 0.7},
   'replace_to_hindu_names': {'min_pass_rate': 0.66}},
  'robustness': {'lowercase': {'min_pass_rate': 0.65},
   'swap_entities': {'min_pass_rate': 0.65},
   'uppercase': {'min_pass_rate': 0.65}},
  'representation': {'min_label_representation_count': {'min_count': {'O': 50,
     'PER': 10,
     'LOC': 40}}

In [5]:
h3.generate()

<nlptest.nlptest.Harness at 0x1e7c4cf5a00>

In [6]:
h3.testcases()


Unnamed: 0,category,test_type,original,test_case,expected_result
0,Accuracy,min_micro_f1_score,-,micro,0.5
1,Accuracy,min_macro_f1_score,-,macro,0.5
2,Accuracy,min_weighted_f1_score,-,weighted,0.5
3,Accuracy,min_precision_score,-,LOC,0.9
4,Accuracy,min_precision_score,-,MISC,0.8
...,...,...,...,...,...
700,representation,min_label_representation_proportion,-,O,0.4
701,representation,min_label_representation_proportion,-,PER,0.2
702,representation,min_label_representation_proportion,-,LOC,0.1
703,representation,min_country_economic_representation_proportion,-,high_income,0.6


In [7]:
h3.run().report()

Unnamed: 0,category,test_type,fail_count,pass_count,pass_rate,minimum_pass_rate,pass
0,Accuracy,min_micro_f1_score,0,1,100%,100%,True
1,Accuracy,min_macro_f1_score,0,1,100%,100%,True
2,Accuracy,min_weighted_f1_score,0,1,100%,100%,True
3,Accuracy,min_precision_score,0,5,100%,100%,True
4,Accuracy,min_recall_score,0,5,100%,100%,True
5,Accuracy,min_f1_score,0,5,100%,100%,True
6,fairness,min_gender_f1_score,0,3,100%,65%,True
7,fairness,max_gender_f1_score,1,2,67%,65%,True
8,Bias,replace_to_female_pronouns,0,94,100%,65%,True
9,Bias,replace_to_low_income_country,6,88,94%,70%,True


## TEXT CLASSIFICATION

In [8]:
from nlptest import Harness

In [9]:
h3 = Harness(task="text-classification", model="lvwerra/distilbert-imdb", hub="huggingface", data="demo/data/sample.csv")


In [10]:
h3.configure("demo/data/config.yml")

{'defaults': {'min_pass_rate': 0.65},
 'tests': {'accuracy': {'min_micro_f1_score': {'min_score': 0.5},
   'min_macro_f1_score': {'min_score': 0.5},
   'min_weighted_f1_score': {'min_score': 0.5},
   'min_precision_score': {'min_score': {'O': 0.5,
     'LOC': 0.9,
     'PER': 0.8,
     'MISC': 0.8,
     'ORG': 0.8}},
   'min_recall_score': {'min_score': 0.75},
   'min_f1_score': {'min_score': 0.8}},
  'fairness': {'min_gender_f1_score': {'min_score': 0.5},
   'max_gender_f1_score': {'max_score': 0.99}},
  'bias': {'replace_to_female_pronouns': {'min_pass_rate': 0.65},
   'replace_to_low_income_country': {'min_pass_rate': 0.7},
   'replace_to_white_lastnames': {'min_pass_rate': 0.7},
   'replace_to_hindu_names': {'min_pass_rate': 0.66}},
  'robustness': {'lowercase': {'min_pass_rate': 0.65},
   'swap_entities': {'min_pass_rate': 0.65},
   'uppercase': {'min_pass_rate': 0.65}},
  'representation': {'min_label_representation_count': {'min_count': {'O': 50,
     'PER': 10,
     'LOC': 40}}

In [11]:
h3.generate()

AttributeError: 'SequenceLabel' object has no attribute 'entity'

In [None]:
h3.run().report()

Unnamed: 0,category,test_type,fail_count,pass_count,pass_rate,minimum_pass_rate,pass
0,Accuracy,min_micro_f1_score,0,1,100%,100%,True
1,Accuracy,min_recall_score,2,1,33%,100%,False
2,fairness,min_gender_f1_score,1,2,67%,65%,True
3,Bias,replace_to_female_pronouns,0,10,100%,65%,True
4,Robustness,lowercase,0,10,100%,65%,True
5,representation,min_gender_representation_count,2,0,0%,65%,False


In [None]:
h