In [None]:
!pip install keras_preprocessing

Collecting keras_preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m568.8 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: keras_preprocessing
Successfully installed keras_preprocessing-1.1.2


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import numpy as np

In [None]:
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)

In [None]:
train, test = imdb['train'], imdb['test']

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(
    [[1, 2, 3], [4, 5, 6], [7, 8, 9]])
for x in dataset:
  print(x)

tf.Tensor([1 2 3], shape=(3,), dtype=int32)
tf.Tensor([4 5 6], shape=(3,), dtype=int32)
tf.Tensor([7 8 9], shape=(3,), dtype=int32)


In [None]:
dataset = tf.data.Dataset.from_tensor_slices(
    [[[1,2], [1,2], [1,2]], [[1,2], [1,2], [1,2]], [[1,2], [1,2], [1,2]]])
for x in dataset:
  print(x)

tf.Tensor(
[[1 2]
 [1 2]
 [1 2]], shape=(3, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [1 2]
 [1 2]], shape=(3, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [1 2]
 [1 2]], shape=(3, 2), dtype=int32)


In [None]:
dataset = tf.data.Dataset.from_tensor_slices([1,2,3])
print(dataset)
for x in dataset:
  print(x)

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)


In [None]:
class MyCallback(tf.keras.callbacks.Callback):
	def on_epoch_end(self, epoch, logs={}):
		if logs.get("loss") < 1e-4:
			self.model.stop_training = True

In [None]:
# ============================================================================================
# PROBLEM C5
#
# Build and train a neural network to predict time indexed variables of
# the multivariate house hold electric power consumption time series dataset.
# Using a window of past 24 observations of the 7 variables, the model
# should be trained to predict the next 24 observations of the 7 variables.
# Use MAE as the metrics of your neural network model.
# We provided code for normalizing the data. Please do not change the code.
# Do not use lambda layers in your model.
#
# The dataset used in this problem is downloaded from https://archive.ics.uci.edu/dataset/235/individual+household+electric+power+consumption
#
# Desired MAE < 0.1 on the normalized dataset.
# ============================================================================================

import urllib
import os
import zipfile
import pandas as pd
import tensorflow as tf

# This function downloads and extracts the dataset to the directory that contains this file.
# DO NOT CHANGE THIS CODE
# (unless you need to change the URL)
def download_and_extract_data():
    url = 'https://raw.githubusercontent.com/dicodingacademy/dicoding_dataset/main/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()

class MyCallback(tf.keras.callbacks.Callback):
	def on_epoch_end(self, epoch, logs={}):
		if logs.get("mae") < 0.04:
			self.model.stop_training = True

# This function normalizes the dataset using min max scaling.
# DO NOT CHANGE THIS CODE
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

# COMPLETE THE CODE IN THE FOLLOWING FUNCTION.
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(n_past+n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(n_past+n_future))
    ds = ds.shuffle(1000)
    ds = ds.map(lambda w: (w[:-n_past], w[-n_past:, :1]))
    return ds.batch(batch_size).prefetch(1)

# COMPLETE THE CODE IN THE FOLLOWING FUNCTION.
def solution_C5():
    # Downloads and extracts the dataset to the directory that contains this file.
    download_and_extract_data()
    # Reads the dataset from the csv.
    df = pd.read_csv('household_power_consumption.csv', sep=',',
                     infer_datetime_format=True, index_col='datetime', header=0)

    # Number of features in the dataset. We use all features as predictors to
    # predict all features at future time steps.
    N_FEATURES = len(df.columns)

    # Normalizes the data
    # DO NOT CHANGE THIS
    data = df.values
    split_time = int(len(data) * 0.5)
    data = normalize_series(data, data.min(axis=0), data.max(axis=0))

    # Splits the data into training and validation sets.
    x_train = data[:split_time]
    x_valid = data[split_time:]

    # DO NOT CHANGE THIS
    BATCH_SIZE = 32
    N_PAST = 24 # Number of past time steps based on which future observations should be predicted
    N_FUTURE = 24  # Number of future time steps which are to be predicted.
    SHIFT = 1  # By how many positions the window slides to create a new window of observations.

    # Code to create windowed train and validation datasets.
    # Complete the code in windowed_dataset.
    train_set = windowed_dataset(x_train, BATCH_SIZE)
    valid_set = windowed_dataset(x_valid, BATCH_SIZE)

    # Code to define your model.
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(filters=32, kernel_size=5, strides=1, padding="causal", activation="relu", input_shape=[N_PAST, N_FEATURES]),
        tf.keras.layers.LSTM(32, return_sequences=True),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(N_FUTURE * N_FEATURES),
        tf.keras.layers.Reshape((N_FUTURE, N_FEATURES))
    ])

    # Code to train and compile the model
    # YOUR CODE HERE
    callback = MyCallback()
    model.compile(
        loss='mae',
        optimizer="adam",
        metrics=['mae']
    )

    model.fit(train_set, validation_data=valid_set, epochs=500, callbacks=[callback])

    return model

# The code below is to save your model as a .h5 file.
# It will be saved automatically in your Submission folder.
if __name__ == '__main__':
    # DO NOT CHANGE THIS CODE
    model = solution_C5()
    model.save("model_C5.h5")

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500


  saving_api.save_model(


In [None]:
training_labels.head(10)

In [None]:
text='''
gamers snap up new sony psp gamers have bought almost all of the first batch of sony s new playstation portable (psp) games console  which went on sale in japan on sunday.  thousands of people queued for hours to get hold of one of the 200 000 psps which were shipped to retailers. the handheld console can play games  music and movies and goes on sale in europe and north america next year. despite the demand sony said it would not increase the 500 000-strong stock of psps it plans to ship by year s end.  sony says it intends to ship three million of the consoles by march 2005. the company is hoping to challenge the dominance of nintendo in the handheld market. nintendo released its new ds console earlier this year and has already raised shipment targets for the device by 40%. the psp is selling in japan for 19 800 yen ($188; £98) while nintendo s ds console sells in the us and japan for $150 (£78). nintendo s goal is to ship 5 million of its new nintendo ds handheld consoles by march 2005.
'''

In [None]:
# Load the tokenizer used during training
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text)

# Tokenize and pad the input text
sequence = tokenizer.texts_to_sequences(text)
padded_sequence = pad_sequences(sequence, maxlen=120, padding='post', truncating='post')

# Make predictions
predictions = model.predict(padded_sequence)

# Print the predicted class probabilities
print("Predicted Probabilities:", predictions)

# Get the predicted class (index with maximum probability)
predicted_class = tf.argmax(predictions, axis=1).numpy()[0]
print(predicted_class)

# Assuming you have a mapping from index to class label
class_mapping = {0: 'business', 1: 'entertainment', 2: 'politics', 3: 'sport', 4: 'tech'}

# Print the predicted class label
print("Predicted Class:", class_mapping[predicted_class])

Predicted Probabilities: [[2.0964621e-06 8.3767780e-04 9.9879324e-01 2.0241036e-05 3.4673919e-04]
 [2.3654216e-06 8.2174223e-04 9.9876404e-01 2.1848624e-05 3.8992715e-04]
 [2.1903588e-06 8.2334212e-04 9.9878317e-01 2.1547898e-05 3.6970360e-04]
 ...
 [2.4722492e-06 8.9983398e-04 9.9870205e-01 2.1365899e-05 3.7435879e-04]
 [2.0964601e-06 8.3767739e-04 9.9879324e-01 2.0240997e-05 3.4673905e-04]
 [2.0964601e-06 8.3767739e-04 9.9879324e-01 2.0240997e-05 3.4673905e-04]]
2
Predicted Class: politics


In [None]:
print(model.layers[0].input_shape)

(None, 1)
