<a href="https://colab.research.google.com/github/03axdov/EnglishLanguageLearning/blob/main/EnglishLanguageLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd

In [None]:
tf.random.set_seed(47)

In [None]:
metrics = [tf.keras.metrics.RootMeanSquaredError()]

In [None]:
train_ds = pd.read_csv("EnglishLanguageLearning/train.csv")
test_ds = pd.read_csv("EnglishLanguageLearning/test.csv")

In [None]:
print(train_ds.shape)
train_ds.head()

(3911, 8)


Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5


In [None]:
print(test_ds.shape)
test_ds.head()

(3, 2)


Unnamed: 0,text_id,full_text
0,0000C359D63E,when a person has no experience on a job their...
1,000BAD50D026,Do you think students would benefit from being...
2,00367BB2546B,"Thomas Jefferson once states that ""it is wonde..."


In [None]:
train_features = train_ds.pop("full_text")

In [None]:
train_ds.pop("text_id")

0       0016926B079C
1       0022683E9EA5
2       00299B378633
3       003885A45F42
4       0049B1DF5CCC
            ...     
3906    FFD29828A873
3907    FFD9A83B0849
3908    FFDC4011AC9C
3909    FFE16D704B16
3910    FFED00D6E0BD
Name: text_id, Length: 3911, dtype: object

In [None]:
train_labels = train_ds

In [None]:
num_classes = train_labels.shape[-1]

In [None]:
train_features.head()

0    I think that students would benefit from learn...
1    When a problem is a change you have to let it ...
2    Dear, Principal\n\nIf u change the school poli...
3    The best time in life is when you become yours...
4    Small act of kindness can impact in other peop...
Name: full_text, dtype: object

In [None]:
train_labels.head()

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,3.5,3.5,3.0,3.0,4.0,3.0
1,2.5,2.5,3.0,2.0,2.0,2.5
2,3.0,3.5,3.0,3.0,3.0,2.5
3,4.5,4.5,4.5,4.5,4.0,5.0
4,2.5,3.0,3.0,3.0,2.5,2.5


In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_labels))

In [None]:
tf.data.experimental.cardinality(train_ds)

<tf.Tensor: shape=(), dtype=int64, numpy=3911>

In [None]:
train_ds = train_ds.batch(32)

In [None]:
tf.data.experimental.cardinality(train_ds)

<tf.Tensor: shape=(), dtype=int64, numpy=123>

In [None]:
for text_batch, label_batch in train_ds.take(1):
  print(f"Text: {text_batch[0]}")
  print(f"Labels: {label_batch[0]}")

Text: b"I think that students would benefit from learning at home,because they wont have to change and get up early in the morning to shower and do there hair. taking only classes helps them because at there house they'll be pay more attention. they will be comfortable at home.\n\nThe hardest part of school is getting ready. you wake up go brush your teeth and go to your closet and look at your cloths. after you think you picked a outfit u go look in the mirror and youll either not like it or you look and see a stain. Then you'll have to change. with the online classes you can wear anything and stay home and you wont need to stress about what to wear.\n\nmost students usually take showers before school. they either take it before they sleep or when they wake up. some students do both to smell good. that causes them do miss the bus and effects on there lesson time cause they come late to school. when u have online classes u wont need to miss lessons cause you can get everything set up a

In [None]:
train_ds = train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
import re
import string

def custom_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  stripped = tf.strings.regex_replace(lowercase, '\n', ' ')
  return tf.strings.regex_replace(stripped, f"[{re.escape(string.punctuation)}]", '')

In [None]:
vocab_size = 20000  # Should be relatively high as there are misspellings
sequence_length = 200

vectorize_layer = tf.keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=vocab_size,
    output_sequence_length=200
)

In [None]:
text_ds = train_ds.map(lambda x, y: x)

In [None]:
vectorize_layer.adapt(text_ds)
vectorize_layer.get_vocabulary()[:20]

['',
 '[UNK]',
 'to',
 'the',
 'and',
 'you',
 'a',
 'that',
 'in',
 'they',
 'is',
 'i',
 'of',
 'have',
 'for',
 'be',
 'it',
 'can',
 'because',
 'people']

In [None]:
embedding_dim = 32

def get_model():
  model = tf.keras.Sequential([
      vectorize_layer,
      layers.Embedding(vocab_size, embedding_dim, name="Embedding"),
      layers.GlobalAveragePooling1D(),
      layers.Dense(32, activation="relu"),
      layers.Dense(num_classes)
  ])

  return model

In [None]:
model = get_model()