#Exercise for multiclass classification

#Check for GPU

In [11]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-010717b5-f668-9ef2-7bc7-6e807c0c6b1e)


#Imports

In [12]:
import os

import tensorflow as tf

#Get dataset

In [13]:
path = tf.keras.utils.get_file('stack-overflow-questions',
                  'https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz',
                  untar=True)

#Walkthrough dataset folders


In [14]:
dataset_dir = os.path.dirname(path)
os.listdir(dataset_dir)



['README.md', 'test', 'train', 'stack-overflow-questions.tar.gz']

In [15]:
train_dir = os.path.join(dataset_dir, 'train')

os.listdir(train_dir)

['csharp', 'python', 'javascript', 'java']

In [16]:
test_dir = os.path.join(dataset_dir, 'test')

#Use text_dataset_from_directory

In [17]:
seed=42

raw_train_ds = tf.keras.utils.text_dataset_from_directory(train_dir,
                                                          seed=seed,
                                                          validation_split=0.2,
                                                          subset='training')

Found 8000 files belonging to 4 classes.
Using 6400 files for training.


In [18]:
val_ds = tf.keras.utils.text_dataset_from_directory(train_dir,
                                                          seed=seed,
                                                          validation_split=0.2,
                                                          subset='validation')

Found 8000 files belonging to 4 classes.
Using 1600 files for validation.


In [19]:
test_ds = tf.keras.utils.text_dataset_from_directory(test_dir)

Found 8000 files belonging to 4 classes.


#Look Through Dataset

In [20]:
for text, label in raw_train_ds.take(1):
  for i in range(3):
      print(text.numpy()[i])
      print(label.numpy()[i])

b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can be easily fixed, please forgive me. my program has a tester class with a main. when i send that to my regularpolygon class, it sends it to the wrong constructor. i have two constructors. 1 without perameters..public regularpolygon().    {.       mynumsides = 5;.       mysidelength = 30;.    }//end default constructor...and my second, with perameters. ..public regularpolygon(int numsides, double sidelength).    {.        mynumsides = numsides;.        mysidelength = sidelength;.    }// end constructor...in my tester class i have these two lines:..regularpolygon shape = new regularpolygon(numsides, sidelength);.        shape.menu();...numsides and sidelength were declared and initialized earlier in the testing class...so what i want to happen, is the tester class sends numsides and sidelength to the second constructor and use it in that class. but it only uses the default constructor, w

#Text vectorization

In [21]:
text_vectorizer = tf.keras.layers.TextVectorization(
    max_tokens=10000,
    output_sequence_length=250
)

train_text = raw_train_ds.map(lambda x, y: x)

text_vectorizer.adapt(train_text)

#Perf

In [22]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)


#Create the model

#Create embedding layer

In [23]:
embeddings = tf.keras.layers.Embedding(input_dim=10000+1,output_dim=16)

In [24]:
from tensorflow.keras import layers

num_classes=4

# inputs = layers.Input(shape=(1,))
# x=text_vectorizer(inputs)
# x=embeddings(x)
# x=layers.Dropout(0.2)(x)
# x=layers.GlobalAveragePooling1D()(x)
# x=layers.Dropout(0.2)(x)
# outputs=layers.Dense(num_classes, activation='softmax')(x)
# model=tf.keras.Model(inputs, outputs)

model= tf.keras.Sequential([
    text_vectorizer,
    embeddings,
    layers.Dropout(0.2),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', 
              loss=tf.keras.losses.SparseCategoricalCrossentropy, 
              metrics=['accuracy'])

In [None]:
model.fit(train_ds, epochs=10)

#Evaluate the model

#Plot loss curves
