<a href="https://colab.research.google.com/github/Berenger12/Berenger12/blob/main/translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#importation des bibliotheque necessaire
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import re #Ce module fournit des opérations de correspondance d'expressions régulières similaires à celles trouvées dans Perl.
import torch
import torch.nn as nn

import torchtext
torchtext.disable_torchtext_deprecation_warning()



In [4]:
#verification des fichier existants dans mon directory
os.listdir("/content/sample_data/")

['anscombe.json',
 'README.md',
 'archive.zip',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv']

In [None]:
# Lecture des données et affichage des premieres ligne du dataset
df=pd.read_csv("/content/sample_data/archive.zip")
df.head(20)

Unnamed: 0.1,Unnamed: 0,EWE,ENGLISH
0,0,Ne nyɔnu aɖe le evi dzim eye wo le kukum nɛ la...,﻿If a woman often loss his baby after he is bo...
1,1,Ŋkɔ sia nye na ŋkɔ si ke ame bubu tsɔna na ɖev...,"This name comes from another person, which mea..."
2,2,Ame si hɔ ɖevi la ƒlela tsona ƒome bubu me alo...,This person must not be part of the whole fami...
3,3,Kɔnua wo yina ale: evinɔ si ga dzi ɖevi bubu a...,The ceremony is done as follow: the family of ...
4,4,Ne ame aɖe vayina to afimagodzi he kɔ ɖevia la...,When somebody passes through the road and find...
5,5,"Emegbe la ɖevila ƒe pomea ɖona to, he tɔna tek...","After then, the family simply waiting for info..."
6,6,Esiao kãtã vayina le ga ƒoƒo aɖeo me.\n,All this happen in some hours.
7,7,Ɖevi yeye la ƒe ƒomea nana mɔnu kpɔkpɔ amesi f...,The family gives an opportunity to the person ...
8,8,"Le ɣemayi mela ame si fɔ ɖevila, nana ŋkɔ bubu...",At this time this person gives a name he wants...
9,9,"Evi fɔla si trozu ɖevila ƒe tɔ evelia la, ateŋ...",This person who becomes the second father of t...


In [None]:
# comptage du nombre de ligne et de colonne dans le dataset
df.shape

(28614, 3)

In [None]:
#information sur les contenu du dataset
df.info

In [None]:
# la description de mes donnees
df.describe()

Unnamed: 0.1,Unnamed: 0
count,28614.0
mean,14306.5
std,8260.294638
min,0.0
25%,7153.25
50%,14306.5
75%,21459.75
max,28613.0


In [None]:
#verification pour voir si le dataset comporte des valeurs manquantes
df.isnull().sum().any()

False

In [None]:
# Nettoyage des données
def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower()
    return text

df['EWE'] = df['EWE'].apply(clean_text)
df['ENGLISH'] = df['ENGLISH'].apply(clean_text)


In [None]:
df.head(20)

Unnamed: 0.1,Unnamed: 0,EWE,ENGLISH
0,0,ne nynu ae le evi dzim eye wo le kukum n la o ...,if a woman often loss his baby after he is bor...
1,1,k sia nye na k si ke ame bubu tsna na evia si ...,this name comes from another person which mean...
2,2,ame si h evi la lela tsona ome bubu me alo duk...,this person must not be part of the whole fami...
3,3,knua wo yina ale evin si ga dzi evi bubu alo e...,the ceremony is done as follow the family of t...
4,4,ne ame ae vayina to afimagodzi he k evia la le...,when somebody passes through the road and find...
5,5,emegbe la evila e pomea ona to he tna tekp ekp...,after then the family simply waiting for infor...
6,6,esiao kt vayina le ga oo aeo me\n,all this happen in some hours
7,7,evi yeye la e omea nana mnu kpkp amesi f evi l...,the family gives an opportunity to the person ...
8,8,le emayi mela ame si f evila nana k bubu si dz...,at this time this person gives a name he wants...
9,9,evi fla si trozu evila e t evelia la ate ne el...,this person who becomes the second father of t...


In [None]:
# Division des données
train_data, temp_data = train_test_split(df, test_size=0.2, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers):
        super(TransformerModel, self).__init__()
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers)
        self.src_embedding = nn.Embedding(src_vocab_size, d_model)
        self.tgt_embedding = nn.Embedding(tgt_vocab_size, d_model)
        self.fc_out = nn.Linear(d_model, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embedding(src)
        tgt_emb = self.tgt_embedding(tgt)
        output = self.transformer(src_emb, tgt_emb)
        output = self.fc_out(output)
        return output

In [None]:
# entrainement du model
def train_model(model, iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0

    for i, batch in enumerate(iterator):
        src = batch.src
        trg = batch.trg

        optimizer.zero_grad()
        output = model(src, trg[:,:-1])

        output_dim = output.shape[-1]
        output = output.contiguous().view(-1, output_dim)
        trg = trg[:,1:].contiguous().view(-1)

        loss = criterion(output, trg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)


In [None]:
# evaluation du model

from torchtext.data.metrics import bleu_score
def evaluate_model(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    translations = []
    references = []

    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch.src
            trg = batch.trg

            output = model(src, trg[:,:-1])
            output_dim = output.shape[-1]
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:,1:].contiguous().view(-1)

            loss = criterion(output, trg)
            epoch_loss += loss.item()

            # Collecte des traductions et des références pour calculer le BLEU score
            translations.append(output.argmax(1))
            references.append(trg)

    bleu = bleu_score(translations, references)

    return epoch_loss / len(iterator), bleu



TypeError: Module.eval() missing 1 required positional argument: 'self'