In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

In [None]:
df = pd.read_table("tam.txt", header=None)

In [None]:
df[0]

0                                               I slept.
1                                             Calm down.
2                                             I'll walk.
3                                             Who is he?
4                                             Who knows?
                             ...                        
202    Tom says he doesn't think he can do that by hi...
203    People who live in glass houses shouldn't thro...
204    It's been a long time since I've heard anyone ...
205    If you want your workers to be happy, you need...
206    It's my fault that the cake was burned. I was ...
Name: 0, Length: 207, dtype: object

In [None]:
df

Unnamed: 0,0,1,2
0,I slept.,நான் தூங்கினேன்.,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
1,Calm down.,அமைதியாக இருங்கள்,CC-BY 2.0 (France) Attribution: tatoeba.org #4...
2,I'll walk.,நான் நடப்பேன்.,CC-BY 2.0 (France) Attribution: tatoeba.org #2...
3,Who is he?,அவன் யார்?,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
4,Who knows?,யாருக்குத் தெரியும்?,CC-BY 2.0 (France) Attribution: tatoeba.org #2...
...,...,...,...
202,Tom says he doesn't think he can do that by hi...,என் ஒருவனால் மட்டுமே அதை செய்ய முடியாது என்று ...,CC-BY 2.0 (France) Attribution: tatoeba.org #6...
203,People who live in glass houses shouldn't thro...,கண்ணாடி வீட்டில் வசிப்பவகள் கல்லை எறியக் கூடாது,CC-BY 2.0 (France) Attribution: tatoeba.org #5...
204,It's been a long time since I've heard anyone ...,ஒருவர் அந்த வார்த்தையைப் பயன் படுத்துவதைக் கேட...,CC-BY 2.0 (France) Attribution: tatoeba.org #4...
205,"If you want your workers to be happy, you need...",உங்களுடைய வேலையாட்கள் மகிழ்ச்சியாக இருக்க வேண்...,CC-BY 2.0 (France) Attribution: tatoeba.org #1...


In [None]:
source_texts = df[0]
target_texts = df[1]

In [None]:
source_texts

0                                               I slept.
1                                             Calm down.
2                                             I'll walk.
3                                             Who is he?
4                                             Who knows?
                             ...                        
202    Tom says he doesn't think he can do that by hi...
203    People who live in glass houses shouldn't thro...
204    It's been a long time since I've heard anyone ...
205    If you want your workers to be happy, you need...
206    It's my fault that the cake was burned. I was ...
Name: 0, Length: 207, dtype: object

In [None]:
target_texts

0                                       நான் தூங்கினேன்.
1                                      அமைதியாக இருங்கள்
2                                         நான் நடப்பேன்.
3                                             அவன் யார்?
4                                   யாருக்குத் தெரியும்?
                             ...                        
202    என் ஒருவனால் மட்டுமே அதை செய்ய முடியாது என்று ...
203      கண்ணாடி வீட்டில் வசிப்பவகள் கல்லை எறியக் கூடாது
204    ஒருவர் அந்த வார்த்தையைப் பயன் படுத்துவதைக் கேட...
205    உங்களுடைய வேலையாட்கள் மகிழ்ச்சியாக இருக்க வேண்...
206    என்னுடையத் தவறினால் கேக்கானதுக் கருகிப் போனது....
Name: 1, Length: 207, dtype: object

In [None]:
source_vocab = set(' '.join(source_texts))
target_vocab = set(' '.join(target_texts))
source_vocab_size = len(source_vocab)
target_vocab_size = len(target_vocab)

In [None]:
source_char_to_int = {char: idx for idx, char in enumerate(source_vocab)}
target_char_to_int = {char: idx for idx, char in enumerate(target_vocab)}
source_int_to_char = {idx: char for char, idx in source_char_to_int.items()}
target_int_to_char = {idx: char for char, idx in target_char_to_int.items()}

In [None]:
# Convert text sequences to integer sequences
source_sequences = [[source_char_to_int[char] for char in text] for text in source_texts]
target_sequences = [[target_char_to_int[char] for char in text] for text in target_texts]

In [None]:
# Pad sequences to the same length
max_sequence_length = max(len(seq) for seq in source_sequences)
source_sequences = tf.keras.preprocessing.sequence.pad_sequences(source_sequences, maxlen=max_sequence_length, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, maxlen=max_sequence_length, padding='post')

In [None]:
# Build the model
input_shape = (max_sequence_length, source_vocab_size)
output_shape = (max_sequence_length, target_vocab_size)

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(source_vocab_size, 64),
    tf.keras.layers.SimpleRNN(128, return_sequences=True),
    tf.keras.layers.Dense(target_vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
# One-hot encode the target sequences
target_sequences_one_hot = np.array([tf.keras.utils.to_categorical(seq, num_classes=target_vocab_size) for seq in target_sequences])

In [None]:
# Train the model
model.fit(source_sequences, target_sequences_one_hot, epochs=2000)

Epoch 1/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9030 - loss: 0.3214
Epoch 2/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8927 - loss: 0.3585
Epoch 3/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8803 - loss: 0.3861
Epoch 4/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8984 - loss: 0.3334
Epoch 5/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8891 - loss: 0.3556
Epoch 6/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9071 - loss: 0.3122
Epoch 7/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9161 - loss: 0.2821
Epoch 8/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9142 - loss: 0.2834
Epoch 9/2000
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x1c1f48aba10>

In [None]:
# Translate a new input sequence
input_sequence = "She danced with him."
input_sequence = [source_char_to_int[char] for char in input_sequence]
input_sequence = tf.keras.preprocessing.sequence.pad_sequences([input_sequence], maxlen=max_sequence_length, padding='post')
output_sequence = model.predict(input_sequence)[0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


In [None]:
# Decode the output sequence
output_sequence = [target_int_to_char[np.argmax(char)] for char in output_sequence]

In [None]:
print("Input Sequence: 'She danced with him.'")
print("Translated Sequence:", ''.join(output_sequence))

Input Sequence: 'She danced with him.'
Translated Sequence: அவள் அவனோடு நடனம் ஆடினாள்ஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊஊ
