# Session 06 

## Time series & Natural Language Processing  <table class="tfo-notebook-buttons" align="right" style="margin-top:-55px">
  <td>
      <a target="_blank" href="https://keras.io/search.html?query=recurrent%20neural%20network"><CNTER><img src="https://external-content.duckduckgo.com/iu/?u=http%3A%2F%2Fadventuresinmachinelearning.com%2Fwp-content%2Fuploads%2F2017%2F05%2Fkeras-logo-small-wb-1.png&f=1&nofb=1"  width="50" height="50" /><p style='margin-left:12px'>KERAS</p></CENTER></a>
  </td>
 </table>

In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from tensorflow.python.keras.layers.core import Activation

skip_plot =5  ### Plot strides
import tensorflow as tf
import matplotlib.pyplot as plt

url = 'https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv'

openpower_germany_df = pd.read_csv(url, sep=',', index_col=0, 
                             parse_dates=[0]) 
openpower_germany_df.tail()

In [None]:
openpower_germany_df['Consumption'][::skip_plot].plot(marker='*')
plt.xlabel('time')
plt.ylabel('electricity consumption')
plt.show()
consumption_energy = openpower_germany_df['Consumption'].to_numpy()
print(consumption_energy.shape)

In [None]:
### process the data for training and testing
def make_data(time_series,step_x,step_y):
    x = list()
    Y = list()
    for i in range(len(time_series)):
        ind_x = i + step_x
        ind_y = ind_x + step_y
        if (ind_y>len(time_series)):  #as step_y can be big and bounding condition
            break

        seq_x, seq_y = time_series[i:ind_x], time_series[ind_x:ind_y]
        x.append(seq_x)
        Y.append(seq_y)
    return x,Y

step_x = 25
step_y = 1

x,Y = make_data(consumption_energy,step_x,step_y)
x = np.array(x)
Y = np.array(Y)
feature_in = 1
x = x.reshape(x.shape[0],x.shape[1],feature_in)
print((x.shape,Y.shape))

In [None]:
### now we can apply different algorithms
#Average
def avg_baseline(x):
    return np.mean(x,axis=1)

Y_pred_avg = avg_baseline(x)


plt.plot(Y[::skip_plot],alpha=0.5,color='r')
plt.plot(Y_pred_avg[::skip_plot],'b.')
plt.legend(['True','Avg-pred'])
plt.show()
r2_score(Y, Y_pred_avg)*100.

## Simple RNN

In [None]:
from tensorflow import keras
import numpy as np
import tensorflow as tf

np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.SimpleRNN(100,activation='relu',input_shape=(step_x,feature_in)),
    keras.layers.Dense(step_y)
])

optimizer = keras.optimizers.Adam(learning_rate=0.005)
model.compile(loss="mse", optimizer=optimizer)
history= model.fit(x,Y,epochs=10,verbose=0)
loss = history.history["loss"] 
plt.plot(loss, "b.-", label='Trainig Loss')
plt.show()

In [None]:
y_pred = model.predict(x)
print(f'Actual: {Y[0]} Prediction: {y_pred[0]}')

## GRU

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.GRU(100,activation='relu',input_shape=(step_x,feature_in),return_sequences=True),
    keras.layers.GRU(100,activation='relu',return_sequences=False),
    keras.layers.Dense(step_y)
])

model.compile(loss="mse", optimizer="adam", metrics=['mean_squared_error'])
history= model.fit(x,Y,epochs=10,verbose=0)
loss = history.history["loss"] 
plt.plot(loss, "b.-", label='Trainig Loss')
plt.show()


In [None]:
y_pred = model.predict(x)
# print(r2_score(Y, Y_pred_avg)*100.)
print(f'Actual: {Y[0]} Prediction: {y_pred[0]}')

## LSTM

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.LSTM(100,activation='relu',input_shape=(step_x,feature_in),return_sequences=True),
    keras.layers.LSTM(100,activation='relu',return_sequences=False),
    keras.layers.Dense(step_y)
])

model.compile(loss="mse", optimizer="adam", metrics=['mean_squared_error'])
history= model.fit(x,Y,epochs=10,verbose=0)
loss = history.history["loss"] 
plt.plot(loss, "b.-", label='Trainig Loss')
plt.show()

In [None]:
y_pred = model.predict(x)
print(f'Actual: {Y[0]} Prediction: {y_pred[0]}')

## Transformer /huggingface *library*

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from scipy.spatial.distance import cdist
!pip install transformers >/dev/null

### transformers Pipeline 
In the Transformers package, the pipeline is a wrapper class which preprocess input, predicts and post process output for other pipelines like Named Entity Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction, Question Answering, etc.

<code>  pipeline(
                    'task_name',
                    model ='model_name',
                    tokenizer 
                )
</code>
<br>Some of aviailable models

    feature-extraction (get the vector representation of a text)
    fill-mask
    ner (named entity recognition)
    question-answering
    sentiment-analysis
    summarization
    text-generation
    translation
    zero-shot-classification

In [None]:
from transformers import pipeline

### Sentiment Analysis

In [None]:
classifier = pipeline('sentiment-analysis')

In [None]:
results = classifier("We are were happy wiht course content")
print(results)
#######################################
# exercise 1 Chage above text to get  #
# a normal response                   #
#######################################


In [None]:
sentiment_output = results[0]['label']
sentiment_score = results[0]['score']
print(f'Sentiment is: {sentiment_output} and its score: {sentiment_score}')

### Question-answer NLP example

In [None]:
question_answer = pipeline('question-answering')

In [None]:
q_a = question_answer({
    'question': 'Who developed this library ?',
    'context':'bot is created in the transformer library'
})
print(q_a)

#######################################
# exercise 1 Chage above text to get  #
# answer for your question            #
#######################################

In [None]:
print('The answer is', q_a['answer'])

### Text Summarization

In [None]:
summary_ext = pipeline("summarization", model="facebook/bart-large-cnn")

In [None]:
text = """This is a text summary test. We are going to see in this course if the text 
can be summarize efficiently. This section of IST course is about the NLP (natural language processing). In this course of AI which brings together 
computer science and statistics to harness that predictive power. It’s a must-have skill for all aspiring data analysts and data scientists, or anyone else who wants to wrestle all that raw data into refined trends and predictions."""

result = summary_ext(text)
print(result)

### Fill in the blank document processing

In [None]:
mask_complete = pipeline('fill-mask',model='bert-base-uncased')

In [None]:
mask_complete("Aoa, i like to develop [MASK] model.")
#######################################
# exercise 1 try multiple Mask        #
#######################################

# Tips and Advance concepts 👇



## Custom Auto text completion.

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku 
import numpy as np 

%load_ext tensorboard
import tensorflow as tf
import datetime, os
!mkdir logs

In [None]:
tokenizer = Tokenizer()
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \
    -O /tmp/sonnets.txt
data = open('/tmp/sonnets.txt').read()

corpus = data.lower().split("\n")


tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# create input sequences using list of tokens
input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)


# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)

In [None]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
callbacks = [
            tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
]
model.fit(predictors, label, epochs=25, verbose=2, callbacks=callbacks)

In [None]:
%tensorboard --logdir logs

In [None]:
def auto_complete(seed_text, next_words):
	for _ in range(next_words):
		token_list = tokenizer.texts_to_sequences([seed_text])[0]
		token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
		predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
		output_word = ""
		for word, index in tokenizer.word_index.items():
			if index == predicted:
				output_word = word
				break
		seed_text += " " + output_word
	return seed_text

print(auto_complete("towrad the end of era", 5))

In [None]:
print(auto_complete("i will be back", 10))