<a href="https://colab.research.google.com/github/NazarioR9/BNBR_Challenge/blob/master/notebooks/TranslateWithTransformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installs

In [None]:
!pip install transformers --quiet

# Imports

In [None]:
import os, sys, gc
import random
import pandas as pd
import numpy as np

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

In [None]:
from transformers import MarianTokenizer, MarianMTModel
from typing import List

In [None]:
from tqdm.auto import tqdm

# Envs

In [None]:
seed = 2020

In [None]:
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available(): 
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

# Read Data

In [None]:
path = './../data/'

In [None]:
train = pd.read_csv(path+'final_train.csv')
test = pd.read_csv(path+'final_test.csv')

In [None]:
train.head()

In [None]:
test.head()

Unnamed: 0,ID,text
0,02V56KMO,how to overcome bad feelings and emotions
1,03BMGTOK,i feel like giving up in life
2,03LZVFM6,i was so depressed feel like got no strength ...
3,0EPULUM5,i feel so low especially since i had no one t...
4,0GM4C5GD,can i be successful when i am a drug addict ?


# Utilities

In [None]:
class TranslateDataset(Dataset):
  def __init__(self, df):
    super(TranslateDataset, self).__init__()
    self.df = df

  def  __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    return self.df.loc[idx, 'text']

In [None]:
class TranslateLoader:
  def __init__(self, ds, bs):
    self.ds = ds
    self.bs = bs
    self.count = 0
    self.szs = len(ds)
    self.maxs = self.szs//bs
    if self.szs % bs != 0:
      self.maxs += 1

  def __len__(self):
    return self.maxs

  def __iter__(self):
    return self

  def __next__(self):
    if self.count >= self.maxs:
      raise StopIteration
    
    idxs = list(range(self.count*self.bs, min( (self.count+1)*self.bs, self.szs ) ))
    batch = [ self.ds[i] for i in idxs]
    self.count += 1

    return batch

#Translation

In [None]:
src = 'en'  # source language
trg = 'fr'  # target language
mname = f'Helsinki-NLP/opus-mt-{src}-{trg}'

In [None]:
model = MarianMTModel.from_pretrained(mname)
tok = MarianTokenizer.from_pretrained(mname)



In [None]:
ds_train = TranslateDataset(train)
ds_test = TranslateDataset(test)

dl_train = TranslateLoader(ds_train, bs=32)
dl_test = TranslateLoader(ds_test, bs=32)

In [None]:
def translate(dl):
  translated = []

  for _ in tqdm(range(len(dl)), desc='Translation'):
    data = next(dl)
    batch = tok.prepare_translation_batch(src_texts=data)
    gen = model.generate(**batch)
    translated += tok.batch_decode(gen, skip_special_tokens=True)

  return translated

In [None]:
translated_train = translate(dl_train)

In [None]:
translated_test = translate(dl_test)

In [None]:
len(translated_train), len(train)

(597, 597)

In [None]:
train[f'translation_{src}_to_{trg}'] = translated_train
test[f'translation_{src}_to_{trg}'] = translated_test

In [None]:
train.head()

Unnamed: 0,ID,text,label,Depression,Alcohol,Suicide,Drugs,translation_en_to_fr
0,SUAVK39Z,i feel that it was better i die am happy,0,1,0,0,0,J'ai l'impression que c'était mieux que je meu...
1,9JDAGUV3,why do i get hallucinations ?,3,0,0,0,1,Pourquoi ai-je des hallucinations?
2,419WR1LQ,i am stressed due to lack of financial suppor...,0,1,0,0,0,Je suis stressé en raison du manque de soutien...
3,6UY7DX6Q,why is life important ?,2,0,0,1,0,Pourquoi la vie est-elle importante?
4,FYC0FTFB,how could i be helped to go through the depre...,0,1,0,0,0,Comment pourrais-je être aidé à traverser la d...


In [None]:
test.head()

Unnamed: 0,ID,text,translation_en_to_fr
0,02V56KMO,how to overcome bad feelings and emotions,comment surmonter les mauvais sentiments et le...
1,03BMGTOK,i feel like giving up in life,J'ai envie d'abandonner dans la vie.
2,03LZVFM6,i was so depressed feel like got no strength ...,J'étais tellement déprimé que j'avais l'impres...
3,0EPULUM5,i feel so low especially since i had no one t...,Je me sens si faible d'autant plus que je n'av...
4,0GM4C5GD,can i be successful when i am a drug addict ?,Puis-je réussir quand je suis toxicomane?


# Save everyrhing

In [None]:
train.to_csv(path + 'train_translated_to_french.csv', index=False)

In [None]:
test.to_csv(path + 'test_translated_to_french.csv', index=False)