In [36]:
import tensorflow as tf
import shutil
import os
import re
import matplotlib.pyplot as plt
import string

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [2]:
!mkdir stackoverflow

mkdir: cannot create directory ‘stackoverflow’: File exists


In [3]:
url = "http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz"

dataset = tf.keras.utils.get_file('stack_overflow_16k.tar.gz', url, cache_dir = ".", cache_subdir = "", untar = True)

In [4]:
dataset_url = '/content/stackoverflow'

In [5]:
os.listdir(dataset_url)

['test', 'train']

In [6]:
train_dir = os.path.join(dataset_url, 'train')
os.listdir(train_dir)

['java', 'csharp', 'python', 'javascript']

In [7]:
sample_file = os.path.join(train_dir, 'csharp/0.txt')
with open(sample_file) as f:
  print(f.read())

"how to pause loop while time is ticking i have made a timer where i can set time i want to wait and then do something..so this is my short timer func:..private void sleep(int interval, action action).{.    system.windows.forms.timer mytimer = new system.windows.forms.timer();.    mytimer.interval = interval; //interval is in ms   .    mytimer.start();.    mytimer.tick += (s, e) =&gt;.    {.        action();.        mytimer.stop();.    };.}...and im using this timer in loop:..foreach (string word in words).{.   sleep(5000, ()=&gt; myaction());                                           .}...without loop timer is great but in loop it wont work because loop wont stop and wait for those 5secs. it do all stuff imediately and starts timer again and again too fast...so what im trying to find out is how to make my loop wait until time runs out and myaction() is executed. im working on forms application so all threadin sleeps wont work here. also i tried all other timers but they used too much 

In [9]:
batch_size = 32
seed = 52

raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
    train_dir, batch_size = batch_size, seed = seed, validation_split=0.2, subset='training'
    )

Found 8000 files belonging to 4 classes.
Using 6400 files for training.


In [11]:
for text_batch, label_batch in raw_train_ds.take(1):
  for i in range(3):
    print("Review: ", text_batch.numpy()[i])
    print("Label: ", label_batch.numpy()[i])

Review:  b"write out xmlwriter to file i have an xmlwriter object used in a method. i'd like to dump this out to a file to read it. is there a straightforward way to do this?..thanks\n"
Label:  0
Review:  b'"ignore user input after certain point is there a way i can kill / break out of user input on my tic tac toe board after a winner has been declared? i tried using break in the isfull() function after the alert was sent of who won but it still would accept user input in the table afterwords...here is a link to show you it running: .https://jsfiddle.net/n1kn1vlh/2/..function tictactoe() {.  this.board = [.    [0, 0, 0],.    [0, 0, 0],.    [0, 0, 0].  ];.  this.showhtml = tohtml;.  this.player2 = ""o"";.  this.player1 = ""x"";.  this.turn = """";.}..function tohtml() {.  var player = \'\';.  var displayplayer = document.getelementbyid(""displaymessage"");.  var htmlstr = \'\';.  var gametable = document.getelementbyid(""tictable"");.  var cell = \'\';.  for (var i = 0; i &lt; this.boar

In [13]:
print("Label 0 corresponds to ", raw_train_ds.class_names[0])
print("Label 1 corresponds to ", raw_train_ds.class_names[1])
print("Label 2 corresponds to ", raw_train_ds.class_names[2])
print("Label 3 corresponds to ", raw_train_ds.class_names[3])

Label 0 corresponds to  csharp
Label 1 corresponds to  java
Label 2 corresponds to  javascript
Label 3 corresponds to  python


In [21]:
test_url = os.path.join(dataset_url, '/test')
print(dataset_url)
print(test_url)

/content/stackoverflow
/test


In [23]:
raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
    'stackoverflow/test', batch_size = batch_size
)

Found 8000 files belonging to 5 classes.


In [46]:
raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory(
    train_dir, batch_size = batch_size, seed = seed, validation_split=0.2, subset='validation'
    )

Found 8000 files belonging to 4 classes.
Using 1600 files for validation.


In [37]:
def customized_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
  return tf.strings.regex_replace(stripped_html, '[%s]' % re.escape(string.punctuation),
                                  '')

In [30]:
max_features = 10000
sequence_length = 250
vec_layer = TextVectorization(
    max_tokens=max_features, standardize = customized_standardization, output_mode = 'int', output_sequence_length=sequence_length
    )

In [38]:
train_text = raw_train_ds.map(lambda x, y: x)
vec_layer.adapt(train_text)

In [43]:
def vectorize_text(text, label):
  text = tf.expand_dims(text, axis = -1)
  return vec_layer(text), label

In [44]:
text_batch, label_batch = next(iter(raw_train_ds))
first_review, first_label = text_batch[0], label_batch[0]
print("First Review: ", first_review)
print("Label: ", raw_train_ds.class_names[first_label])
print("Vectorized Review: ", vectorize_text(first_review, first_label))

First Review:  tf.Tensor(b'"how to sort list<map<string, object>> in blank8 with .stream()? i have a list like this ..list&lt;map&lt;string, object&gt;&gt; list = new arraylist&lt;&gt;();..    for(int i = 0; i &lt; 20; i++) {.        map&lt;string, object&gt; map = new hashmap&lt;&gt;();.        map.put(""quantity"", math.random());.        map.put(""price"", math.random());.        list.add(map);.    }...how can i sort by price?..i hope it is use blank8 stream"\n', shape=(), dtype=string)
Label:  java
Vectorized Review:  (<tf.Tensor: shape=(1, 250), dtype=int64, numpy=
array([[  25,    4,  459,    1,   59,    7,    1,   20,  737,    3,   17,
           5,   55,   48,   13,    1,    1,   55,   15, 2156,  406,    3,
          19,    3,   62,  332,    3, 5123, 2932,  538,   15,    1,    1,
        1551,    1, 1551,    1,   25,   34,    3,  459,   77, 9262,  921,
          10,    6,   71,    1,  737,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    

In [47]:
train_ds = raw_train_ds.map(vectorize_text)
test_ds = raw_test_ds.map(vectorize_text)
val_ds = raw_val_ds.map(vectorize_text)

In [48]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size = AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size = AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size = AUTOTUNE)

In [49]:
embedding_dim = 16
model = tf.keras.Sequential([
  layers.Embedding(input_dm = embedding_dim)
  layers.DropOut(0.2)
  layers.GlobalAveragePooling1D()
  layers.Dropout(0.2)
  
])

SyntaxError: ignored