### NER experiment with spacy 

In [1]:
import spacy
import random
import pandas as pd
from ner.train import custom_train
from ner.eval import ner_model_evalution
from utility.utils import json_2_dataframe
from utility.utils import train_test_spliter
from utility.utils import corpus_entity_info
from spacy.util import minibatch, compounding
from utility.utils import spacy_data_conversion

In [2]:
data = json_2_dataframe('../data/ChatbotCorpus.json')
splited_data = train_test_spliter(data)
train_data, test_data = splited_data.train, splited_data.test

In [3]:
train_ner = pd.concat([pd.DataFrame(ent) for ent in train_data.entities.values])
test_ner = pd.concat([pd.DataFrame(ent) for ent in test_data.entities.values])
corpus_entity_info(train_ner, test_ner).reset_index()

Unnamed: 0,entity,train,test
0,Criterion,50,34
1,Line,2,1
2,StationDest,57,71
3,StationStart,91,102
4,TimeEndTime,2,0
5,TimeStartTime,5,0
6,Vehicle,50,35


In [4]:
train_ner = pd.concat([pd.DataFrame(ent) for ent in train_data.entities.values])
labels = train_ner.entity.unique()

In [5]:
formated_training_data = spacy_data_conversion(train_data,mode='train')

#### Transfer learning using spacy small ner model

In [6]:
small_ner = custom_train(formated_training_data, labels, output_dir='model/ner_model/small_ner',n_iter=20,verbose=False)

Loaded model 'en'
model is trained
model is saved to model/ner_model/small_ner



#### Small ner model evalution

In [7]:
train_data_scorecard = ner_model_evalution(small_ner, spacy_data_conversion(train_data, mode='evalution'))
test_data_scorecard = ner_model_evalution(small_ner, spacy_data_conversion(test_data, mode='evalution'))

### NER score for the training dataset

In [8]:
train_data_scorecard

index,f1_score,precision_score,recall_score
overall,77.155172,85.645933,70.196078
StationDest,75.247525,84.444444,67.857143
StationStart,70.731707,78.378378,64.444444
Criterion,89.361702,95.454545,84.0
Vehicle,81.72043,88.372093,76.0
TimeEndTime,66.666667,100.0,50.0
TimeStartTime,57.142857,100.0,40.0
Line,0.0,0.0,0.0


### NER score for test dataset

In [9]:
test_data_scorecard

index,f1_score,precision_score,recall_score
overall,74.004684,85.869565,65.020576
StationDest,86.153846,94.915254,78.873239
StationStart,59.171598,74.626866,49.019608
Criterion,86.666667,100.0,76.470588
Vehicle,77.61194,81.25,74.285714
Line,0.0,0.0,0.0


#### Obeservation
    - Small ner model is train on spacy small ner model.
    - As we can see the, Model is not properly generalized. We have good amount of the data for StationStart and StationDest but testset's
      overall score is not good.

### Transfer learning using spacy large model

In [10]:
large_ner = custom_train(formated_training_data, labels,model="en_core_web_lg" ,output_dir='model/ner_model/large_ner',n_iter=20,verbose=False)

Loaded model 'en_core_web_lg'
model is trained
model is saved to model/ner_model/large_ner


In [11]:
train_data_scorecard = ner_model_evalution(large_ner, spacy_data_conversion(train_data, mode='evalution'))
test_data_scorecard = ner_model_evalution(large_ner, spacy_data_conversion(test_data, mode='evalution'))

### NER score for train dataset

In [12]:
train_data_scorecard

index,f1_score,precision_score,recall_score
overall,93.957115,93.410853,94.509804
StationDest,92.592593,96.153846,89.285714
StationStart,93.478261,91.489362,95.555556
Criterion,98.039216,96.153846,100.0
Vehicle,94.117647,92.307692,96.0
TimeStartTime,90.909091,83.333333,100.0
TimeEndTime,66.666667,100.0,50.0
Line,66.666667,100.0,50.0


### NER score for test dataset

In [13]:
test_data_scorecard

index,f1_score,precision_score,recall_score
overall,91.139241,93.506494,88.888889
StationDest,91.044776,96.825397,85.915493
StationStart,91.919192,94.791667,89.215686
Criterion,100.0,100.0,100.0
Vehicle,82.191781,78.947368,85.714286
Line,0.0,0.0,0.0


### Obeservation
    - You can notice that all the scores for the train set and test set is not differ much.
    - Model is actully not overfitted. But in corrent dataset, train and test are pretty similar.
    - So if we get samples with more variance for test, better way to evaluete the model