This code is implementing an Artificial Neural Network (ANN) for regression using TensorFlow & Keras. 



In [7]:
import numpy as np
import tensorflow as tf
import pandas as pd

In [8]:
dataset = pd.read_csv('CSV_Data_Sets/all_sites_scores.csv')

In [9]:
dataset


Unnamed: 0,FILM,RottenTomatoes,RottenTomatoes_User,Metacritic,Metacritic_User,IMDB,Fandango_Stars,Fandango_Ratingvalue,RT_norm,RT_user_norm,...,IMDB_norm,RT_norm_round,RT_user_norm_round,Metacritic_norm_round,Metacritic_user_norm_round,IMDB_norm_round,Metacritic_user_vote_count,IMDB_user_vote_count,Fandango_votes,Fandango_Difference
0,Avengers: Age of Ultron (2015),74,86,66,7.1,7.8,5.0,4.5,3.70,4.30,...,3.90,3.5,4.5,3.5,3.5,4.0,1330,271107,14846,0.5
1,Cinderella (2015),85,80,67,7.5,7.1,5.0,4.5,4.25,4.00,...,3.55,4.5,4.0,3.5,4.0,3.5,249,65709,12640,0.5
2,Ant-Man (2015),80,90,64,8.1,7.8,5.0,4.5,4.00,4.50,...,3.90,4.0,4.5,3.0,4.0,4.0,627,103660,12055,0.5
3,Do You Believe? (2015),18,84,22,4.7,5.4,5.0,4.5,0.90,4.20,...,2.70,1.0,4.0,1.0,2.5,2.5,31,3136,1793,0.5
4,Hot Tub Time Machine 2 (2015),14,28,29,3.4,5.1,3.5,3.0,0.70,1.40,...,2.55,0.5,1.5,1.5,1.5,2.5,88,19560,1021,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,Mr. Holmes (2015),87,78,67,7.9,7.4,4.0,4.0,4.35,3.90,...,3.70,4.5,4.0,3.5,4.0,3.5,33,7367,1348,0.0
142,'71 (2015),97,82,83,7.5,7.2,3.5,3.5,4.85,4.10,...,3.60,5.0,4.0,4.0,4.0,3.5,60,24116,192,0.0
143,"Two Days, One Night (2014)",97,78,89,8.8,7.4,3.5,3.5,4.85,3.90,...,3.70,5.0,4.0,4.5,4.5,3.5,123,24345,118,0.0
144,Gett: The Trial of Viviane Amsalem (2015),100,81,90,7.3,7.8,3.5,3.5,5.00,4.05,...,3.90,5.0,4.0,4.5,3.5,4.0,19,1955,59,0.0


In [10]:
x = dataset.iloc[:,1:-1].values #start from 2 column and convert it to numpy array.
y = dataset.iloc[:, -1].values #last column only

The function train_test_split() from sklearn.model_selection is used to split a dataset into training and testing sets.
This is essential for machine learning to evaluate model performance.



In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 0) #20% test set 80% training set, random_state = 0: Ensures consistent/random shuffling every time you run the code.

In [13]:
ann = tf.keras.models.Sequential() #Initializes a sequential ANN model, where layers are stacked one after another,  Sets up a blank ANN to which we add layers.

In [14]:
ann.add(tf.keras.layers.Dense(units = 6, activation ='relu')) #First hidden layer 

units=6 → This layer has 6 neurons.
activation='relu' → Uses ReLU (Rectified Linear Unit) as the activation function.
Why? → ReLU helps the network learn complex patterns while avoiding vanishing gradients.

In [16]:
ann.add(tf.keras.layers.Dense(units = 6, activation ='relu')) #Second Hidden Layer 

1. units=1 → Only 1 neuron in the output layer, as this is a regression problem.
2. No activation function because the model should output a continuous numerical value.
3. Purpose → Builds a 3-layer ANN (2 hidden layers + 1 output layer) for regression.



In [17]:
ann.add(tf.keras.layers.Dense(units = 1)) #output Layer 

1. optimizer='adam' → Uses Adam optimizer, an advanced gradient descent algorithm.
2. loss='mean_squared_error' → Uses MSE (Mean Squared Error) as the loss function (good for regression).

In [18]:
ann.compile(optimizer = 'adam', loss = 'mean_squared_error')

1. x_train, y_train → The training data.
2. batch_size=32 → Trains the model in batches of 32 samples at a time (improves efficiency).
3. epochs=100 → Runs 100 iterations over the training data.
4. Purpose → Trains the neural network to adjust weights & biases for best predictions.


In [38]:
ann.fit(x_train,y_train, batch_size = 64, epochs = 100)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 86.6192 
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 87.1459 
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 71.0349
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 83.6007 
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 69.8325 
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 85.5173 
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 69.0735
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 66.4377
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 83.5109
Epoch 10/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 67.6

<keras.src.callbacks.history.History at 0x15f16f8c0>

In [40]:
y_pred = ann.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1) , y_test.reshape(len(y_test),1)), 1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[[  6.7    0.5 ]
 [  5.13   0.4 ]
 [  8.17   0.4 ]
 [ 10.34   0.1 ]
 [  9.9    0.  ]
 [  1.34   0.3 ]
 [  9.73   0.4 ]
 [  1.67   0.4 ]
 [ -3.96   0.1 ]
 [  0.67   0.4 ]
 [  7.86   0.4 ]
 [ -6.88   0.2 ]
 [-15.28   0.  ]
 [  5.26   0.3 ]
 [  6.4    0.3 ]
 [  7.     0.2 ]
 [  2.28   0.4 ]
 [  6.55   0.3 ]
 [  2.63   0.1 ]
 [  7.8    0.5 ]
 [  3.73   0.2 ]
 [  7.13   0.4 ]
 [  8.79   0.1 ]
 [  1.12   0.2 ]
 [  9.93   0.  ]
 [  1.57   0.4 ]
 [  8.36   0.  ]
 [  2.01   0.2 ]
 [ -0.53   0.2 ]
 [ -0.47   0.1 ]]


1. np.set_printoptions(precision=2) → Limits printed numbers to 2 decimal places.
2. y_pred.reshape(len(y_pred), 1) → Reshapes predictions into a column vector.
3. y_test.reshape(len(y_test), 1) → Reshapes actual values into a column vector.
4. np.concatenate(..., 1) → Combines predicted & actual values side by side.
5. Purpose → Neatly displays predictions vs actual values.