In [None]:
import pandas as pd
import os

filepath_dict = {'imdb':   'sentiment labelled sentences/imdb_labelled.txt'}

document_list = []
for source, filepath in filepath_dict.items():
    document = pd.read_csv(filepath, names=['sentence', 'label'], sep='\t')
    document['source'] = source 
    document_list.append(document)

document = pd.concat(document_list)
print(document.iloc[0])

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

# min_df is the minimum proportion of documents that contain the word (excludes words that
# are rarer than this proportion)
# max_df is the maximum proportion of documents that contain the word (excludes words that
# are rarer than this proportion
# max_features is the maximum number of words that will be considered
# the documents will be lowercased
vectorizer = CountVectorizer(min_df = 0, max_df = 1.0, max_features = 1000, lowercase = True)


In [None]:
from sklearn.model_selection import train_test_split

document_imdb = document[document['source'] == 'imdb']
num_documents = len(document_imdb)
print(num_documents)

reviews = document_imdb['sentence'].values
y = document_imdb['label'].values

# split the data into training and test
# since this is just an example, we will omit the dev test set
# 'reviews.data' is the movie reviews
# 'y_train' is the categories assigned to each review in the training data
# 'test_size = .20' is the proportion of the data that should be reserved for testing
# 'random_state = 42' is an integer that controls the randomization of the data so that the results are 
# reproducible
reviews_train, reviews_test, y_train, y_test = train_test_split(
   reviews, y, test_size = 0.20, random_state = 42)

In [None]:
vectorizer.fit(reviews_train)
vectorizer.fit(reviews_test)

X_train = vectorizer.transform(reviews_train)
X_test  = vectorizer.transform(reviews_test)

In [None]:
from keras.models import Sequential
from keras import layers
from keras import models

# Number of features (words)
# This is based on the data and the parameters that were provided to the vectorizer
# min_df, max_df and max_features
input_dimension = X_train.shape[1]  
print(input_dimension)

# a Sequential model is a stack of layers where each layer has one input and one output
# tensor
# Since this is a binary classification problem, there will be one output (0 or 1) 
# depending on whether the review is positive or negative
# so Sequential is appropriate
model = Sequential()
model.add(layers.Dense(16, input_dim = input_dimension, activation = 'relu'))
model.add(layers.Dense(16, activation = 'relu'))
model.add(layers.Dense(16, activation = 'relu'))
# output layer
model.add(layers.Dense(1, activation = 'sigmoid'))

In [None]:
model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])
# summarize the model to make sure that it's structured as intended
model.summary()

In [None]:
history = model.fit(X_train, y_train,
                    epochs=20,
                    verbose=True,
                    validation_data=(X_test, y_test),
                    batch_size=10)


In [None]:
from keras.backend import clear_session
# clear states generated by Keras to reduce memory usage
clear_session()

In [None]:
loss, accuracy = model.evaluate(X_train, y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))
loss, accuracy = model.evaluate(X_test, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

In [None]:
print(model.summary())
from keras.utils.vis_utils import plot_model
#visualize the model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
#%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')

def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    x = range(1, len(acc) + 1)

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(x, acc, 'b', label='Training accuracy')
    plt.plot(x, val_acc, 'r', label = 'Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend(fontsize = 14)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    #plt.savefig('acc.svg')
    #plt.show()
    
    plt.subplot(1, 2, 2)
    plt.plot(x, loss, 'b', label='Training loss')
    plt.plot(x, val_loss, 'r', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend(fontsize = 14)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.savefig('loss.svg')
    plt.show()
   

In [None]:
plot_history(history)

In [None]:
# plot inputs and outputs
from matplotlib import pyplot
 
# rectified linear function
def rectified(x):
 return max(0.0, x)
 
# define a series of inputs
series_in = [x for x in range(-15, 15)]
# calculate outputs for our inputs
series_out = [rectified(x) for x in series_in]
# line plot of raw inputs to rectified outputs
plt.grid(color='white')

# Set the background color to gray
ax = plt.gca()
ax.set_facecolor('lightgray')
pyplot.plot(series_in, series_out, color = "red")
pyplot.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define the ReLU function
def relu(x):
    return np.maximum(0, x)

# Generate x values between -15 and 15
x = np.linspace(-15, 15, 500)

# Generate y values using the ReLU function
y = relu(x)

# Set the plot parameters
plt.figure(figsize=(8, 6))
plt.title("ReLU Function")
plt.xlabel("X")
plt.ylabel("Y")
plt.xlim(-15, 15)
plt.ylim(0, 15)
plt.grid(color='white')

# Set the background color to gray
ax = plt.gca()
ax.set_facecolor('lightgray')

# Plot the ReLU function
plt.plot(x, y)

# Show the plot
plt.show()


In [None]:
import numpy as np 
def sig(x):
 return 1/(1 + np.exp(-x))

In [None]:
import matplotlib.pyplot as plt
x = np.linspace(-10, 10, 50)   
p = sig(x)
plt.xlabel("x") 
plt.ylabel("Sigmoid(x)")  
plt.plot(x, p) 
plt.show()