### Preparation


In [47]:
# Packages to installfor sentiment analysis
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
Finished.


In [48]:
# Install translator
pip install translate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [49]:
# Install google trans
!pip install googletrans==3.1.0a0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [51]:
# Import all the libraries that are necessary for the three activities
from textblob import TextBlob
import json
import spacy
from spacy import displacy
from sklearn.model_selection import train_test_split
import googletrans
from googletrans import Translator
import translate

### Warm up: Out of the Box Sentiment Analysis 

In [65]:
# Class of sentiment analysis
class sentiment_analysis:

  def __init__(self, file):
    self.file = file

  def analysis(file):
    with open(file, "r") as data_file:
      for line in data_file:
          data = TextBlob(line.strip().split('<br />')[0])
          polarity = data.polarity
          if polarity < 0: 
            print("Negative")
          elif polarity >= 0:
            print("Positive")

In [66]:
# Calling the class function
analysis = sentiment_analysis.analysis("tiny_movie_reviews_dataset.txt")

Positive
Positive
Positive
Positive
Positive
Positive
Negative
Positive
Negative
Positive
Positive
Positive
Positive
Negative
Positive
Positive
Positive
Positive
Positive
Negative


### NER: Take a basic, pretrained NER model, and train further on a task-specific dataset

In [63]:
class ner:

  def __init__(self, file):
    self.file = file
  
  def ner_train(file):
    file = open(file)
    data = json.load(file)
    NER = spacy.load("en_core_web_sm")
    train, test = train_test_split(data["examples"], test_size=0.25)
    val, test = train_test_split(test, test_size=0.5)

    i = 0
    for i in range(len(train)):
      raw_text = train[i]
      text = NER(raw_text["content"])
      for word in text.ents:
        print(word.text,word.label_) # Print the words and the label assigned
      i += 1
    
    displacy.render(text,style="ent",jupyter=True) # Render the words with the tags assigned

In [64]:
ner.ner_train('Corona2.json')

Gabapentin GPE
neuralgia GPE
disorder1 ORG
NSAIDs ORG
symptoms.[1 ORG
disease.[1] Biological DMARDs PRODUCT
One CARDINAL
about 2 million CARDINAL
Streptococcus and Pseudomonas ORG
Shigella ORG
Salmonella ORG
two CARDINAL
Mouyassué PERSON
banana pipistrelle ORG
Mouyassué GPE
Cote d'Ivoire GPE
West Africa GPE
Magboi PERSON
hairy slit-faced PERSON
the Magboi River LOC
Sierra Leone ORG
2011 DATE
RNA ORG
Bunyaviridae GPE
TB ORG
13% PERCENT
TB ORG
Africa LOC
about 5–10% PERCENT
30% PERCENT
1982–1995 CARDINAL
TB ORG
five CARDINAL
FDA ORG
M2 CARDINAL
M2 drug target.[160] Measured PRODUCT
American NORP
91% PERCENT
China GPE
CDC ORG
M2 CARDINAL
2005–06 ORDINAL
two CARDINAL
M2 CARDINAL
first ORDINAL
first ORDINAL
1967 DATE
HIV.[53 ORG
TB ORG
first ORDINAL
FDA ORG
first ORDINAL
One CARDINAL
SNRI ORG
social phobia ORG
five CARDINAL
European NORP
TCA ORG
CR GPE
Lactobacilli GPE
Trichomonas GPE
Bacterial ORG
Escherichia PERSON
Urine PERSON
Bacteria PERSON
Escherichia NORP
Staphylococcus ORG
Streptoco

### Set up and compare model performance of two different translation models

In [54]:
class traductor:

  def __init__(self, fileTest, fileRef):
    fileTest = self.fileTest
    fileRef = self.fileRef

  def traduction_library(fileTest, fileRef):
    translator2 = translate.Translator(from_lang="english",to_lang= "spanish")

    with open(fileTest, 'r') as fp:
      x = fp.readlines()[0:100]
      i= 0
      for i in range(100):
        translated = translator2.translate(x[i])
        i += 1

    with open(fileRef, 'r') as fp:
        y = fp.readlines()[0:100]

    return x, y

  def traduction_with_google(fileTest, fileRef):

    translator = Translator()

    with open(fileTest, 'r') as fp:
      x = fp.readlines()[0:100]
      i= 0
      for i in range(100):
        translated = translator.translate(x[i], src='en', dest='es')
        i += 1
  
    with open(fileRef, 'r') as fp:
        y = fp.readlines()[0:100]
        i = 0

    return x, y

  def score(trad, x, y):
    i = 0
    for i in range(100): 
      ref = y[i].split() 
      test = x[i].split()

    from nltk.translate.bleu_score import sentence_bleu
    i = 0
    for i in range(100): 
      ref = y[i].split() 
      test = x[i].split()

      i += 1
      print(f'{trad} TRANSLATOR: {sentence_bleu(ref, test)}')

In [55]:
x1, y1 = traductor.traduction_with_google("europarl-v7.es-en.en", "europarl-v7.es-en.es")

In [56]:
traductor.score("GOOGLE", x1, y1)

GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 8.726094729337945e-232
GOOGLE TRANSLATOR: 9.181748633447778e-232
GOOGLE TRANSLATOR: 8.726094729337945e-232
GOOGLE TRANSLATOR: 9.533966891110756e-232
GOOGLE TRANSLATOR: 1.0832677820940877e-231
GOOGLE TRANSLATOR: 1.2508498911928379e-231
GOOGLE TRANSLATOR: 1.1200407237786664e-231
GOOGLE TRANSLATOR: 8.319100378795605e-232
GOOGLE TRANSLATOR: 8.147480343967206e-232
GOOGLE TRANSLATOR: 8.18873472774142e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 1.1200407237786664e-231
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 9.594503055152632e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 9.418382295637229e-232
GOOGLE TRANSLATOR: 8.972141065609098e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 9.157231061812019e-232
GOOGLE TRANSLATOR: 7.784451369270533e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 8.510469113101058e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 7.919883909890055e-232
GOOGLE TRA

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 8.231055179516831e-232
GOOGLE TRANSLATOR: 9.788429383461836e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 8.048704752395483e-232
GOOGLE TRANSLATOR: 7.720899511627474e-232
GOOGLE TRANSLATOR: 7.640041254455826e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 8.477028509227734e-232
GOOGLE TRANSLATOR: 8.06798322521923e-232
GOOGLE TRANSLATOR: 6.752107625974243e-232
GOOGLE TRANSLATOR: 6.492476721861418e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 8.726094729337945e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 9.109159947227211e-232
GOOGLE TRANSLATOR: 9.788429383461836e-232
GOOGLE TRANSLATOR: 0
GOOGLE TRANSLATOR: 7.601159375410181e-232
GOOGLE TRANSLATOR: 1.0003688322288243e-231


In [57]:
x2, y2 = traductor.traduction_library("europarl-v7.es-en.en", "europarl-v7.es-en.es")

In [58]:
traductor.score("LIBRARY", x1, y1)

LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 8.726094729337945e-232
LIBRARY TRANSLATOR: 9.181748633447778e-232
LIBRARY TRANSLATOR: 8.726094729337945e-232
LIBRARY TRANSLATOR: 9.533966891110756e-232
LIBRARY TRANSLATOR: 1.0832677820940877e-231
LIBRARY TRANSLATOR: 1.2508498911928379e-231
LIBRARY TRANSLATOR: 1.1200407237786664e-231
LIBRARY TRANSLATOR: 8.319100378795605e-232
LIBRARY TRANSLATOR: 8.147480343967206e-232
LIBRARY TRANSLATOR: 8.18873472774142e-232
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 1.1200407237786664e-231
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 9.594503055152632e-232
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 9.418382295637229e-232
LIBRARY TRANSLATOR: 8.972141065609098e-232
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 9.157231061812019e-232
LIBRARY TRANSLATOR: 7.784451369270533e-232
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 8.510469113101058e-232
LIBRARY TRANSLATOR: 0
LIBRARY TRANSLATOR: 7.91