In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import string
import pathlib
from tensorflow import keras
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
path = '/content/drive/MyDrive/stack_overflow'
data_dir = pathlib.Path(path)
data_dir = os.path.join(os.path.dirname(data_dir),  'stack_overflow')

In [None]:
os.listdir(data_dir)

['test', 'train', 'README.md']

In [None]:
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')

In [None]:
seed = 42
batch_size = 32

train_ds = keras.utils.text_dataset_from_directory(
    train_dir,
    batch_size = batch_size,
    validation_split=0.1,
    subset = 'training',
    seed = seed
)

Found 8000 files belonging to 4 classes.
Using 7200 files for training.


In [None]:
class_names = train_ds.class_names

In [None]:
validation_ds = keras.utils.text_dataset_from_directory(
    train_dir,
    batch_size = batch_size,
    validation_split = 0.1,
    subset = 'validation',
    seed = seed
)

Found 8000 files belonging to 4 classes.
Using 800 files for validation.


In [None]:
test_ds = keras.utils.text_dataset_from_directory(
    test_dir,
    batch_size = batch_size
)

Found 8002 files belonging to 4 classes.


In [None]:
for test_batch, label_batch in train_ds.take(1):
  for i in range(2):
    print(test_batch[i].numpy())

b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can be easily fixed, please forgive me. my program has a tester class with a main. when i send that to my regularpolygon class, it sends it to the wrong constructor. i have two constructors. 1 without perameters..public regularpolygon().    {.       mynumsides = 5;.       mysidelength = 30;.    }//end default constructor...and my second, with perameters. ..public regularpolygon(int numsides, double sidelength).    {.        mynumsides = numsides;.        mysidelength = sidelength;.    }// end constructor...in my tester class i have these two lines:..regularpolygon shape = new regularpolygon(numsides, sidelength);.        shape.menu();...numsides and sidelength were declared and initialized earlier in the testing class...so what i want to happen, is the tester class sends numsides and sidelength to the second constructor and use it in that class. but it only uses the default constructor, w

In [None]:
def custom_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  stripped_tags = tf.strings.regex_replace(lowercase, '\n', '')
  return tf.strings.regex_replace(stripped_tags,
                                  '[%s]' % re.escape(string.punctuation),'')

In [None]:
max_features = 10000
sequence_length = 250

vectorize_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length)

In [None]:
# Make a text-only dataset (without labels), then call adapt
train_text = train_ds.map(lambda x, y: x)
vectorize_layer.adapt(train_text)

In [None]:
# creating a function to return the vectorized text 
def vectorize(text, label):
  text = vectorize_layer(text)
  return text, label

In [None]:
# mapping the function to the datasets
train = train_ds.map(vectorize)
validation = validation_ds.map(vectorize)
test = test_ds.map(vectorize)

In [None]:
# configuring the dataset for performance
Autotune = tf.data.AUTOTUNE

train_batches = train.cache().shuffle(1000).prefetch(buffer_size=Autotune)
val_batches = validation.cache().prefetch(buffer_size=Autotune)
test_batches = test.cache().prefetch(buffer_size=Autotune)

In [None]:
for text, label in train_batches.take(1):
  for i in range(1):
    print(text[i], label[i])

tf.Tensor(
[ 682  140   31  534   18    3   17    2  123  534   18   54    6    2
  393   84    4  682    2  140   31   11   47   16 3109   50    1    1
    1    1 3174 4634    1    1    1    1    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0 

In [None]:
embedding_dim = 16

model = keras.Sequential([
    keras.layers.Embedding(max_features + 1, embedding_dim),
    keras.layers.Dropout(0.2),
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(4)
])


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = 'adam',
    metrics = ['accuracy']
)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 16)          160016    
                                                                 
 dropout (Dropout)           (None, None, 16)          0         
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense (Dense)               (None, 4)                 68        
                                                                 
Total params: 160,084
Trainable params: 160,084
Non-trainable params: 0
__________________________________________________

In [None]:
# training the model
num_epochs = 15
history = model.fit(
    train_batches,
    epochs = num_epochs,
    validation_data = val_batches)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
predictions = model.predict(test_batches, verbose=0)

In [None]:
# Exporting the model
export_model = keras.Sequential([
    vectorize_layer,
    model,
    keras.layers.Activation('softmax')
])

export_model.compile(
    loss = 'sparse_categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy']
)

In [None]:
# the example below.
example = ['how to install practnlp tools how to install practnlptools in windows as there is no installation guidance in pypi? it would be very helpful also to know how to use srl tagger in there.']
predicted_example = export_model.predict(example)
print(class_names[np.argmax(predicted_example)])

python
