In [None]:
# !pip install spacy
# !python3 -m spacy download en_core_web_sm

# Testing a sample spacy model 
## Data generated using NER-annotator
### Tags used in this set of training_data
>    
    - ACT (Any act tbh, political, medical, physical)
    - CITIZEN (wanted to see if Indian will get tagged, caz India will be tagged as GEO and usually people apply lemmatization)
    - GEO (Geo location)
    - PERSON (data had mentions of Head of States)

In [1]:
import json
import spacy
import warnings
warnings.filterwarnings("ignore")

In [2]:
with open('../input/ner-tagged-data/training_data.json') as fp:
  training_data = json.load(fp)

In [3]:
# prepare an empty model to train
nlp = spacy.blank('en')
nlp.vocab.vectors.name = 'demo'
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner, last=True)
# nlp = spacy.load('en_core_web_sm')

In [4]:
# Add the custome NER Tags as entities into the model
for label in training_data["classes"]:
  nlp.entity.add_label(label)

In [5]:
# Train the model
optimizer = nlp.begin_training()

In [6]:
for i in range(50):
    losses = {}
    for text, annotations in training_data["annotations"]:
        if len(text) > 0: # in case an empty sentence was saved while annotating
            nlp.update([text], [annotations], sgd=optimizer, losses = losses)
#             print(losses)

In [7]:
text = "They say a third German soldier and a sixth Afghan were wounded. The Associated Press quotes Afghan President Hamid Karzai as saying he was deeply saddened by the incident. The German soldiers were part of the NATO-led International Security Assistance Force in Afghanistan. Thousands of Icelanders marked the 90th anniversary of sovereignty from Denmark Monday by demanding the government resign over the country 's economic crisis. The German detachment of about two thousand troops provides security and collects weapons and ammunition from disarmed militias for disposal. ISAF has about 8,000 troops in Afghanistan. Pakistani police reported more deaths Saturday from poisoned bootleg liquor in the port city of Karachi , raising the death toll to at least 40. Several people are still being treated at a local hospital after consuming the toxic batch of liquor late Thursday. Liquor is banned for Muslims in Pakistan , although a few shops are allowed to sell alcohol to non-Muslims. But some Muslims drink alcohol , resorting to black-market supplies smuggled from abroad or homemade liquor that is sometimes tainted. Police say they have arrested several men in a series of raids for illegally preparing and selling the homemade alcohol. In October of last year , at least 12 people died after drinking contaminated liquor in the eastern city of Multan. A former U.S. Senate majority leader says he never agreed to let the Bush administration eavesdrop , without court approval , on phone calls that cross U.S. borders. Democrat Tom Daschle contradicts President Bush , who says Congress granted him the authority in legislation authorizing the use of force against al-Qaida after the September 11 , 2001 terror attacks. Hundreds of marchers tried to storm central bank headquarters in Reykjavik. In an opinion piece in the Washington Post Friday , Mr. Daschle says lawmakers granted the president extra powers to pursue al Qaida , but specifically turned down a White House request to use those powers inside the United States. President Bush last week confirmed he secretly authorized the National Security Agency to eavesdrop in the United States. He called it a vital tool for national security that was within his legal power. Iran 's elite security forces are warning opposition supporters not to hold anti-government demonstrations during a government-sponsored rally on Friday. Opposition activists have called for protests coinciding with the Quds Day rallies , the annual event that expresses support for Palestinians and condemns Israel. Opposition leader Mir Hossein Mousavi has said he plans to attend and activists have encouraged people to capitalize on the large gatherings to protest the disputed re-election of President Mahmoud Ahmadinejad. On Thursday , Iranian state media published a message from the elite Revolutionary Guards promising to crack down on any protests during the rallies. Following Iran's disputed June 12 elections , rights groups said hundreds of people were detained in clashes with security forces during post-election , anti-government demonstrations. Since then , authorities have held public trials of the accused and tried to marginalize moderate officials within the government. The United Nations is praising the use of military helicopters to drop food and rescue survivors in tsunami-ravaged Indonesia , saying the aircraft are worth their weight in gold."
text1 = "Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country. Iranian officials say they expect to get access to sealed sensitive parts of the plant Wednesday , after an IAEA surveillance system begins functioning. Helicopter gunships Saturday pounded militant hideouts in the Orakzai tribal region , where many Taliban militants are believed to have fled to avoid an earlier military offensive in nearby South Waziristan. They left after a tense hour-long standoff with riot police. U.N. relief coordinator Jan Egeland said Sunday , U.S. , Indonesian and Australian military helicopters are ferrying out food and supplies to remote areas of western Aceh province that ground crews can not reach. Mr. Egeland said the latest figures show 1.8 million people are in need of food assistance - with the need greatest in Indonesia , Sri Lanka , the Maldives and India. He said last week 's tsunami and the massive underwater earthquake that triggered it has affected millions in Asia and Africa. Some 1,27,000 people are known dead. Aid is being rushed to the region , but the U.N. official stressed that bottlenecks and a lack of infrastructure remain a challenge. Lebanese politicians are condemning Friday 's bomb blast in a Christian neighborhood of Beirut as an attempt to sow sectarian strife in the formerly war-torn country. In Beirut , a string of officials voiced their anger , while at the United Nations summit in New York , Prime Minister Fouad Siniora said the Lebanese people are resolute in preventing such attempts from destroying their spirit  One person was killed and more than 20 others injured in the bomb blast late Friday , which took place on a residential street. Lebanon has suffered a series of bombings since the massive explosion in February that killed former Prime Minister Rafik Hariri and 20 other people. Syria is widely accused of involvement in his killing , and Friday 's explosion comes days before U.N. investigator Detlev Mehlis is to return to Damascus to interview several Syrian officials about the assassination. The global financial crisis has left Iceland 's economy in shambles. Israeli officials say Prime Minister Ariel Sharon will undergo a medical procedure Thursday to close a tiny hole in his heart discovered during treatment for a minor stroke suffered last month. Doctors describe the tiny hole as a minor birth defect and say it is in the partition between the upper chambers of Mr. Sharon 's heart. The procedure , known as cardiac catheterization , involves inserting a catheter through a blood vessel into the heart , where an umbrella-like device will plug the hole . Doctors say they expect Mr. Sharon will make a full recovery. Mr. Sharon returned to work on December 25 , one week after his emergency hospitalization. Doctors say the stroke has not caused any permanent damage. The designers of the first private manned rocket to burst into space have received a $ 10 million prize created to promote space tourism. SpaceShipOne designer Burt Rutan accepted the Ansari X Prize money and a trophy on behalf of his team Saturday during an awards ceremony in the U.S. state of Missouri . To win the money , SpaceShipOne had to blast off into space twice in a two-week period and fly at least 100 kilometers above Earth. The spacecraft made its flights in late September and early October , lifting off from California 's Mojave desert."

In [8]:
doc = nlp(text1)
for ent in doc.ents:
    print(ent.text, ent.label_)

marched ACT
London GEO
protest ACT
war ACT
Iraq GEO
withdrawal ACT
British CITIZEN
Iranian officials CITIZEN
Orakzai tribal region GEO
Jan Egeland GEO
western ACT
Mr. Egeland GEO
Indonesia CITIZEN
Lebanese CITIZEN
condemning ACT
bomb blast PERSON
killed ACT
bomb blast ACT
massive explosion ACT
Prime Minister Rafik Hariri CITIZEN
Syria GEO
killing ACT
global financial crisis ACT
Iceland GEO
Israeli CITIZEN
Prime Minister Ariel Sharon PERSON
medical procedure ACT
Mr. Sharon PERSON
Mr. Sharon PERSON
Mr. Sharon PERSON
Burt Rutan PERSON
Ansari PERSON
awards ceremony ACT
U.S. state of Missouri GEO
