In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install tensorflow-datasets

In [None]:
import tensorflow_datasets as tfds
import tensorflow.keras as keras
import tensorflow as tf

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

mnist_train = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(X_train))
mnist_valid = tf.data.Dataset.from_tensor_slices((X_valid, y_valid))
mnist_test = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [None]:
mnist_train = mnist_train.shuffle(10000)

In [None]:
from tensorflow.train import BytesList, FloatList, Int64List, Feature, Features, Example

mnist_train_batched = mnist_train.batch(32)
output_dir = 'train_dataset/'
tf.io.gfile.makedirs(output_dir)
total_records = 0

for i, batch in enumerate(mnist_train_batched):
    output_tfrecord_path = f'{output_dir}/batch_{i}.tfrecord'
    
    with tf.io.TFRecordWriter(output_tfrecord_path) as f:
        for image_batch, label_batch in tf.data.Dataset.from_tensor_slices(batch).map(lambda image, label: (image, label)):
            example = Example(features=Features(feature={
                'image': Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(image_batch).numpy()])),
                'label': Feature(int64_list=Int64List(value=[label_batch]))
            }))
            total_records += 1
            f.write(example.SerializeToString())
print(f'Saved {i + 1} TFRecord files with {32} examples per file to {output_dir}, total_records: {total_records}')

In [None]:
tfrecord_dir = '/kaggle/working/train_dataset'
tfrecord_files = tf.io.gfile.glob(tfrecord_dir + '/*.tfrecord')

In [None]:
tfrecord_files

In [None]:
def parse_tfrecord_fn(example):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(example, feature_description)
    image = tf.io.parse_tensor(example['image'], out_type=tf.uint8)
    image = tf.reshape(image, shape=[28, 28])
    label = example['label']
    return image, label

def mnist_dataset(filepaths, n_read_threads=5, shuffle_buffer_size=None,
                  n_parse_threads=5, batch_size=32, cache=True):
    dataset = tf.data.TFRecordDataset(filepaths,
                                      num_parallel_reads=n_read_threads)
    if cache:
        dataset = dataset.cache()
    if shuffle_buffer_size:
        dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(parse_tfrecord_fn, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)

In [None]:
import matplotlib.pyplot as plt

train_set = mnist_dataset(tfrecord_files, shuffle_buffer_size=60000)
for X, y in train_set.take(1):
    for i in range(5):
        plt.subplot(1, 5, i + 1)
        plt.imshow(X[i].numpy(), cmap="binary")
        plt.axis("off")
        plt.title(str(y[i].numpy()))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    keras.layers.Rescaling(1.0/255, input_shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(10, activation='softmax')  # Output shape is (None, 10)
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
#model.fit(train_set, epochs=5)

In [None]:
import pandas as pd

dataset = pd.read_csv('/kaggle/input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv')

dataset.head()

In [None]:
imbd_train = dataset[:25000]
imbd_valid = dataset[25000:40000]
imbd_test = dataset[40000:]

In [None]:
imbd_train.shape

In [None]:
for dataset in [imbd_train, imbd_test, imbd_valid]:
    dataset['sentiment'] = dataset['sentiment'].replace({'negative': 0, 'positive': 1})

In [None]:
imbd_train = tf.data.Dataset.from_tensor_slices((imbd_train['review'], imbd_train['sentiment'])).shuffle(imbd_train.shape[0])

In [None]:
imbd_train

In [None]:
imbd_valid = tf.data.Dataset.from_tensor_slices((imbd_valid['review'], imbd_valid['sentiment']))
imbd_test = tf.data.Dataset.from_tensor_slices((imbd_test['review'], imbd_test['sentiment']))

In [None]:
for item in imbd_train.take(2):
    print(item)

In [None]:
imdb = keras.Sequential([
     
])