# Create a model that will get the names and political parties using Machine learning

## Library and data imports

In [4]:
import spacy
import json
import random
from spacy.training.example import Example

In [5]:
def load_data(file):
    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)
        
    return data

In [6]:
def save_data(file, data):
    with open(file, "w", encoding="utf-8") as f:
        json.dump(data, f, indent = 4)

In [7]:
TRAINING_DATA = load_data("data/political_training_data.json")

## Model Training

In [8]:
def train_spacy(data, iterations):
    TRAIN_DATA = data
    nlp = spacy.blank("en")
    
    if "ner" not in nlp.pipe_names:
        nlp.add_pipe("ner", last=True)
        ner =nlp.get_pipe("ner")
        
    # add labels into the model
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
            
    #deactivate all other pipes
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    with nlp.disable_pipes(*other_pipes):
        optimizer = nlp.begin_training()
        
        #iterate training process
        for itn in range(iterations):
            print("Starting iteration " +str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            
            for text, annotations in TRAIN_DATA:
                doc=nlp.make_doc(text)
                example = Example.from_dict(doc, annotations)
                nlp.update(
                    [example],
                    drop=0.2,
                    sgd =optimizer,
                    losses=losses
                )
            print(losses)
            
    return (nlp)

In [None]:
import time
start_time = time.time()
nlp = train_spacy(TRAIN_DATA, 30)
nlp.to_disk("political_ner_model")
print("--- %s seconds ---" % (time.time() - start_time))