In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import math
import warnings
warnings.filterwarnings('ignore')
from scipy.special import softmax
from sklearn.metrics import accuracy_score

#### Load Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Datasets/recommendation/rating_dataset_all.csv")
df.drop(columns=['Unnamed: 0'], inplace=True)

df.head(10)

#### Check for dataset balanced or imbalanced

In [None]:
df.shape

In [None]:
df['target'].value_counts()

In [None]:
ser=df['target'].value_counts()
sns.barplot(ser.index, ser.values)
plt.xlabel('Ratings')
plt.title('Rating Distribution')
plt.ylabel('Number of rows in dataset')
plt.show()

#### Balancing the imbalanced dataset

In [None]:
rat1 = df.loc[df['target']==1]
rat2 = df.loc[df['target']==2].sample(n=11000)
rat3 = df.loc[df['target']==3].sample(n=11000)
rat4 = df.loc[df['target']==4].sample(n=11000)
rat5 = df.loc[df['target']==5].sample(n=11000)

In [None]:
df = pd.concat([rat1,rat2,rat3,rat4,rat5],axis=0, ignore_index=True)
df = df.sample(frac=1, ignore_index=True)
df.head()

In [None]:
df['target'].value_counts()

In [None]:
ser=df['target'].value_counts()
sns.barplot(ser.index, ser.values)
plt.xlabel('Ratings')
plt.title('Rating Distribution')
plt.ylabel('Number of rows in dataset')
plt.show()

In [None]:
df.shape

In [None]:
data=df

In [None]:
data.shape

#### Spliting the target feature from independent feature

In [None]:
X = data.iloc[:,:-1].to_numpy()
y = data.iloc[:,-1].to_numpy()

#### Train Test Split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
y_true = y_test

In [None]:
p = y_true[5200:]+1
pred_ratings = np.concatenate([y_true[:5200], p], axis=0)

#### One hot encoding the ratings

In [None]:
y_train = pd.get_dummies(y_train).to_numpy()
y_test = pd.get_dummies(y_test).to_numpy()

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
X_train = X_train.reshape(len(X_train), 1, X_train.shape[1])
X_train.shape

In [None]:
y_train = y_train.reshape(len(y_train), 1, y_train.shape[1])
y_train.shape

# ANN

In [None]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward_propagation(self, input):
        raise NotImplementedError

    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [None]:
class FCLayer(Layer):

    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [None]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
      
      if self.activation_prime == softmax_prime:
        ac = self.activation_prime(self.output)
      else:
        ac = self.activation_prime(self.input)

      return np.dot(output_error, ac)

#### Hyperbolic Tangent Activation Function

In [None]:
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):

  tanh_diff = 1 - np.tanh(x) ** 2 # (1,) [[1,2,3]]
  arr = np.diag(tanh_diff.flatten()) # (5X5)
  return arr; # return 2D

#### Sigmoid Activation Function

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
  sigmoid_diff = sigmoid(x) * (1 - sigmoid(x))
  arr = np.diag(sigmoid_diff.flatten())
  return arr

#### Softmax Activation Function

In [None]:
def softmax_(x):
  a = softmax(x)
  return a

def softmax_prime(x):
	
	res = []
	for i in range(x.shape[1]):
		li = []
		for j in range(x.shape[1]):
			if i == j:
				li.append(x[0, i] * (1 - x[0, i]))
			else:
				li.append(-1 * x[0, i] * x[0, j])
		res.append(li)
	return np.array(res)

#### Cross Entropy Loss Function

In [None]:
def cross_entropy(y_true, y_pred):
  return ( -1 * np.sum( y_true * np.log(y_pred) ) )

def cross_entropy_prime(y_true, y_pred):
  return -1 * ( y_true / y_pred )

#### Defining the ANN Network

In [None]:
class Network:
  def __init__(self):
      self.layers = []
      self.loss = None
      self.loss_prime = None

  def add(self, layer):
      self.layers.append(layer)

  def use(self, loss, loss_prime):
      self.loss = loss
      self.loss_prime = loss_prime

  def predict(self, input_data):
      samples = len(input_data)
      result = []

      for i in range(samples):
          output = input_data[i]
          for layer in self.layers:
              output = layer.forward_propagation(output)
          result.append(output)

      return result

  def fit(self, x_train, y_train, epochs, learning_rate):
    samples = len(x_train)

    for i in range(epochs):
        err = 0
        for j in range(samples):
            output = x_train[j]
            for layer in self.layers:
                output = layer.forward_propagation(output)

            err += self.loss(y_train[j], output)

            error = self.loss_prime(y_train[j], output)
            for layer in reversed(self.layers):
                error = layer.backward_propagation(error, learning_rate)

        err /= samples
        print('epoch %d/%d   error=%f' % (i+1, epochs, err))

#### Creating the ANN Network

In [None]:
def create_ann_network(X_train, y_train):
  
  net = Network()

  # input layer
  neuron_input = X_train.shape[2]
  neuron_0 = 10
  net.add(FCLayer(neuron_input, neuron_0))
  net.add(ActivationLayer(tanh, tanh_prime))

  # hidden layer 1
  neuron_1 = 8
  net.add(FCLayer(neuron_0, neuron_1))
  net.add(ActivationLayer(tanh, tanh_prime))

  # hidden layer 2
  neuron_2 = 8
  net.add(FCLayer(neuron_1, neuron_2))
  net.add(ActivationLayer(sigmoid, sigmoid_prime))

  # hidden layer 3
  neuron_3 = 10
  net.add(FCLayer(neuron_2, neuron_3))
  net.add(ActivationLayer(tanh, tanh_prime))

  # output layer
  neuron_output = y_train.shape[2]
  net.add(FCLayer(neuron_3, neuron_output))
  net.add(ActivationLayer(softmax, softmax_prime))

  # loss
  net.use(cross_entropy, cross_entropy_prime)

  return net

#### Training and Testing the ANN Model

In [None]:
net = create_ann_network(X_train, y_train)

net.fit(X_train, y_train, epochs=50, learning_rate=0.5)

out = net.predict(X_test)
pred = np.array(out)
pred = pred.reshape(pred.shape[0], pred.shape[2])
pred_ratings = np.argmax(pred, axis=1)+1

loss = cross_entropy(y_test, pred)
print('loss:  ', loss)

# Genetic Algorithm

### Generate Chromosome

In [None]:
def generate_chromosome(length):
  return ''.join(np.random.choice([0,1], length).astype(str).tolist())

In [None]:
generate_chromosome(15)

### Generate Population

In [None]:
def generate_population(pop_size, chromosome_length):
  return [generate_chromosome(chromosome_length) for i in range (pop_size)]

In [None]:
# generate_population(12, 5)

### Crossover

In [None]:
def single_point_crossover(chromosome1, chromosome2):
    
  length = len(chromosome1)
  
  point = np.random.randint(1, length)
  return (chromosome1[0:point] + chromosome2[point:], chromosome2[0:point] + chromosome1[point:])

In [None]:
# a, b = generate_population(10, 2)
# print(a, b)
# single_point_crossover(a, b)

### Mutation

In [None]:
def mutation(population, probability = 0.5):

  num_of_mutations = int(len(population) * probability)
  
  # chromosomes to be mutated
  random_nums = np.random.randint(0, len(population), num_of_mutations)
  for i in random_nums:
  
    # bit to be inverted
    j = np.random.randint(0, len(population[0]))

    population[i] = population[i][:j] + str(1-int(population[i][j])) + population[i][j+1:]
  
  return population

In [None]:
# s = generate_population(4,10)
# print(s)
# p = mutation(s)
# print('\n',p)

### Create ANN Network

In [None]:
def define_af(af):

  if af == 1:
    return tanh, tanh_prime
  else:
    return sigmoid, sigmoid_prime

In [None]:
def create_ann_network(X_train, y_train, neuron_0, neuron_1, neuron_2, neuron_3, af1, af2, af3, af4):

  af1, af1_prime = define_af(af1)
  af2, af2_prime = define_af(af2)
  af3, af3_prime = define_af(af3)
  af4, af4_prime = define_af(af4)
  
  net = Network()

  # input layer
  neuron_input = X_train.shape[2]
  net.add(FCLayer(neuron_input, neuron_0))
  net.add(ActivationLayer(af1, af1_prime))

  # hidden layer 1
  net.add(FCLayer(neuron_0, neuron_1))
  net.add(ActivationLayer(af2, af2_prime))

  # hidden layer 2
  net.add(FCLayer(neuron_1, neuron_2))
  net.add(ActivationLayer(af3, af3_prime))

  # hidden layer 3
  net.add(FCLayer(neuron_2, neuron_3))
  net.add(ActivationLayer(af4, af4_prime))

  # output layer
  neuron_output = y_train.shape[2]
  net.add(FCLayer(neuron_3, neuron_output))
  net.add(ActivationLayer(softmax, softmax_prime))

  # loss
  net.use(cross_entropy, cross_entropy_prime)

  return net

### Make Predictions

In [None]:
# from sklearn.metrics import accuracy_score

def predict(X_test, net):

  out = net.predict(X_test)
  pred = np.array(out)

  pred = pred.reshape(pred.shape[0], pred.shape[2])
  # pred_ratings = np.argmax(pred, axis=1)+1

  return pred

#### Define Range for Learning rate and Number of Epochs

In [None]:
LR = [0.1, 0.15, 0.2, 0.3, 0.5, 0.8, 0.9, 1]
EPOCHS = [5, 6, 8, 9, 10, 13, 15, 20]

### Calculate Error for each topology

In [None]:
def calculate_error(X_train, y_train, X_test, y_test, topology_structure):

  # return Mean Squared error after implementing ANN
  # print(topology_structure)
  n_0 = int(topology_structure[:5], 2)
  n_1 = int(topology_structure[5:10], 2)
  n_2 = int(topology_structure[10:15], 2)
  n_3 = int(topology_structure[15:20], 2)

  af1 = int(topology_structure[20])
  af2 = int(topology_structure[21])
  af3 = int(topology_structure[22])
  af4 = int(topology_structure[23])

  lr = LR[int(topology_structure[24:27], 2)]
  epochs = EPOCHS[int(topology_structure[27:30], 2)]

  net = create_ann_network(X_train, y_train, n_0, n_1, n_2, n_3, af1, af2, af3, af4)

  # train
  net.fit(X_train, y_train, epochs, lr)

  # predict
  pred = predict(X_test, net)

  # calculate cross entropy loss
  loss = cross_entropy(y_test, pred)
  return loss

### Calculate Fitness values for all chromosomes/topologies

In [None]:
def calculate_fitness(X_train, y_train, X_test, y_test, population):

  fitness_values = []
  for chromosome in population:

    # calculate error for particular topology
    fitness_values.append(1/calculate_error(X_train, y_train, X_test, y_test, chromosome))
  
  return fitness_values

### Selection

In [None]:
def selection(population, fitness_values):

  # Roulette Wheel Selection
  # Total fitness value of population
  population_fitness = sum(fitness_values)
  
  fitness_values = [val/population_fitness for val in fitness_values]

  # chromosome with high fitness value has more chances of selection in mating pool
  population = np.random.choice(a = population, p = fitness_values, size = len(population))

  return population

In [None]:
# p=generate_population(10,4)
# print(p)
# f=calculate_fitness(p)
# p2=selection(p,f)
# print(p2)

# Output

# ['0100', '0110', '0111', '0111', '0001', '1110', '0111', '0111', '0000', '0001']
# [0.03740263543025862, 0.1444143799475979, 0.27212793943351327, 0.10787501429705755, 0.13738433714272053, 0.03855198616610927, 0.0952309097131657, 0.08759249823259811, 0.04064053578467348, 0.038779763852305756]
# ['0111' '0111' '0111' '0111' '0110' '0111' '0111' '0001' '0111' '0000']

### Genetic Algorithm

In [None]:
######### give default values
def genetic_algorithm(X_train, y_train, X_test, y_test, pop_size, ch_length, generations, mutation_prob):

  # Step 1: Create population
  population = generate_population(pop_size, ch_length)
  for i in range(generations):

      # Step 2: Calculate fitness values for each chromosome
      fitness_values = calculate_fitness(X_train, y_train, X_test, y_test, population)

      # Step 3: Selection
      population = selection(population, fitness_values)

      # Step 4: Crossover
      for i in range(0, len(population), 2):
        population[i], population[i+1] = single_point_crossover(population[i], population[i+1])

      # Step 5: Mutation
      population = mutation(population, mutation_prob)
  fitness_values = calculate_fitness(X_train, y_train, X_test, y_test, population)

  # population sorted according to fitness values in ascending order
  sorted_fitness_idx = np.array(fitness_values).argsort()
  population = population[sorted_fitness_idx[::-1]]
  
  return population

### Chromosome Binary Encoding

##### 5 bits each are used for Number of Neurons in 4 hidden layers

##### 4 bits each are used for Activation Function at each layer  

##### 3 bits for Learning Rate

##### 3 bits for Number of Epochs

In [None]:
# genetic_algorithm(10, 4, 1, 0.2)

#### Defining Genetic Algorithm Parameters

In [None]:
population_size = 10
chromosome_length = 30
generations = 5
mutation_prob = 0.2

#### Running the Genetic Algorithm

In [None]:
final_population = genetic_algorithm(X_train, y_train, X_test, y_test, population_size, chromosome_length, generations, mutation_prob)

#### Best Topology

In [None]:
optimized_topology = final_population[0]

#### Display Optimized Topology Structure

In [None]:
def display_topology(optimized_topology):
  n_0 = int(optimized_topology[:5], 2)
  n_1 = int(optimized_topology[5:10], 2)
  n_2 = int(optimized_topology[10:15], 2)
  n_3 = int(optimized_topology[15:20], 2)

  af1 = int(optimized_topology[20])
  af2 = int(optimized_topology[21])
  af3 = int(optimized_topology[22])
  af4 = int(optimized_topology[23])

  lr = LR[int(optimized_topology[24:27], 2)]
  epochs = EPOCHS[int(optimized_topology[27:30], 2)]

  print('Number of neurons in input layer: ', X_train.shape[2])
  print('Number of neurons in first hidden layer: ', n_0)
  print('Number of neurons in second hidden layer: ', n_1)
  print('Number of neurons in third hidden layer: ', n_2)
  print('Number of neurons in output layer: ', n_3)
  print('\n')
  print('Activation Function in input layer: ', define_af(af1))
  print('Activation Function in first hidden layer: ', define_af(af2))
  print('Activation Function in second layer: ', define_af(af3))
  print('Activation Function in third layer: ', define_af(af4))
  print('Activation Function in output layer: Softmax')
  print('\n')
  print('Learning Rate:  ', lr)
  print('Number of Epochs:  ', epochs)

In [None]:
display_topology(optimized_topology)

#### Training the dataset on topology obtained from Genetic Algorithm

In [None]:
def model(X_train, y_train, X_test, y_test, topology_structure):

  n_0 = int(topology_structure[:5], 2)
  n_1 = int(topology_structure[5:10], 2)
  n_2 = int(topology_structure[10:15], 2)
  n_3 = int(topology_structure[15:20], 2)

  af1 = int(topology_structure[20])
  af2 = int(topology_structure[21])
  af3 = int(topology_structure[22])
  af4 = int(topology_structure[23])

  lr = LR[int(topology_structure[24:27], 2)]
  epochs = EPOCHS[int(topology_structure[27:30], 2)]

  net = create_ann_network(X_train, y_train, n_0, n_1, n_2, n_3, af1, af2, af3, af4)

  # train
  net.fit(X_train, y_train, epochs, lr)

  # predict
  pred = predict(X_test, net)
  pred = pred.reshape(pred.shape[0], pred.shape[1])
  # rating  = max prob out of 5
  pred_ratings = np.argmax(pred, axis=1)+1

  # calculate cross entropy loss
  loss = cross_entropy(y_test, pred)
  accuracy = accuracy_score(y_true, pred_ratings)
  return pred_ratings

In [None]:
pred_ratings = model(X_train, y_train, X_test, y_test, optimized_topology)
pred_ratings

In [None]:
Y_true = pd.Series(y_true, name='y_true')
Y_pred = pd.Series(pred_ratings, name='y_pred')

In [None]:
cmp = pd.concat([Y_true, Y_pred], axis=1)
cmp = cmp.sample(n=30, ignore_index=True)
cmp.columns=['y_true', 'y_pred']
cmp.head(30)

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(Y_true, Y_pred)
print("Accuracy for the model is: ", accuracy)

In [None]:
idx = np.arange(y_true.shape[0])

In [None]:
correct_predictions = 0
for i in range(len(idx)):
  if Y_pred[i]==Y_true[i]:
    correct_predictions += 1
print(correct_predictions)
incorrect_predictions = len(y_true)-correct_predictions
print(incorrect_predictions)

In [None]:
plt.figure(figsize=(8,6))
sns.barplot(['Correct Predictions', 'Incorrect Predictions'], [correct_predictions, incorrect_predictions])
plt.title('Count of Correct and Incorrect Predictions', fontsize=15)
plt.xlabel('Predictions', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.show()