Importing the Libraries

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf                        #data automation, model tracking, performance monitoring, and model retraining
from tensorflow import keras                   #used for distributed training of deep learning models
from tensorflow.keras import layers, callbacks

Input Layer

In [None]:
# Load data

def input_100k():
  global movies
  movies = pd.read_csv('movies.csv')
  global ratings
  ratings = pd.read_csv('ratings.csv')


Common Layers

In ratings.csv file we have 'userID', 'movieID', 'rating', 'timestamp'

In movies.csv file we have 'movieID', 'title', 'genres'

In movies.dat file, we have s.no and movie name, year, genre, children's or not

In [None]:
def common_layer1():
  # Preprocess data
  unique_movie_ids = ratings['movieId'].unique()
  unique_user_ids = ratings['userId'].unique()

  # Create a mapping from movie and user ids to their respective indices in the input tensors
  movie_to_idx = {movie_id: i for i, movie_id in enumerate(unique_movie_ids)}
  user_to_idx = {user_id: i for i, user_id in enumerate(unique_user_ids)}

  # Add the indices to the ratings dataframe
  ratings['movie_idx'] = ratings['movieId'].map(movie_to_idx)
  ratings['user_idx'] = ratings['userId'].map(user_to_idx)

  # Split data into training and test sets
  global train_df
  train_df = ratings.sample(frac=0.8, random_state=42)
  global test_df
  test_df = ratings.drop(train_df.index)

  # Define model architecture
  n_users = len(user_to_idx)
  n_movies = len(movie_to_idx)
  n_factors = 50

  # Input layers
  global movie_input
  movie_input = keras.layers.Input(shape=[1], name='movie')
  global user_input
  user_input = keras.layers.Input(shape=[1], name='user')

  # Embedding layers
  movie_embedding = keras.layers.Embedding(n_movies, n_factors, name='movie_embedding')(movie_input)
  user_embedding = keras.layers.Embedding(n_users, n_factors, name='user_embedding')(user_input)

  # Reshape embedding layers for compatibility with convolutional layers
  movie_embedding_reshaped = keras.layers.Reshape((1, n_factors, 1))(movie_embedding)
  user_embedding_reshaped = keras.layers.Reshape((1, n_factors, 1))(user_embedding)

  # Convolutional layers
  conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(movie_embedding_reshaped)
  conv2 = keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(user_embedding_reshaped)
  merged = keras.layers.concatenate([conv1, conv2])

  return merged

def common_layer2():
  # Flatten layer
  flatten = keras.layers.Flatten()(pool)

  # Output layer
  output = keras.layers.Dense(1, activation='relu')(flatten)

  # Define model
  model = keras.Model(inputs=[movie_input, user_input], outputs=output)

  # Compile model
  model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

  # Train the model on the training set
  global history
  history = model.fit(
    [train_df['movie_idx'].values, train_df['user_idx'].values], train_df['rating'].values,
    epochs=20, batch_size=32, verbose=1,
    validation_data=([test_df['movie_idx'].values, test_df['user_idx'].values], test_df['rating'].values),
    callbacks=[callbacks.EarlyStopping(patience=3, restore_best_weights=True)]
  )

  # Evaluate the model on the test set
  test_loss, test_mae = model.evaluate([test_df['movie_idx'].values, test_df['user_idx'].values], test_df['rating'].values)
  test_rmse = np.sqrt(test_loss)
  print("Test RMSE:", test_rmse)
  print("Test MAE:", test_loss)


Plotting

In [None]:
def plot():
  import matplotlib.pyplot as plt

  # Get training and test loss histories
  training_loss = history.history['loss']

  # Create count of the number of epochs
  epoch_count = range(1, len(training_loss) + 1)

  # Visualize loss history
  plt.plot(epoch_count, training_loss, 'r')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.show();

Max Pooling for 100k dataset

In [None]:
input_100k()
merged=common_layer1()
pool = keras.layers.MaxPooling2D(pool_size=(1, 2))(merged)
common_layer2()
plot()

In [None]:
import matplotlib.pyplot as plt

def plot():
  # Get training and validation loss histories
  training_loss = history.history['loss']
  validation_loss = history.history['val_loss']

  # Create count of the number of epochs
  epoch_count = range(1, len(training_loss) + 1)

  # Visualize loss history
  plt.plot(epoch_count, training_loss, 'r', label='Training Loss')
  plt.plot(epoch_count, validation_loss, 'b', label='Validation Loss')
  plt.legend()
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.show()


In [None]:
input_100k()
merged=common_layer1()
pool = keras.layers.MaxPooling2D(pool_size=(1, 2))(merged)
common_layer2()
plot()

Average Pooling for 100K dataset

In [None]:
input_100k()
merged=common_layer1()
pool = keras.layers.AveragePooling2D(pool_size=(1, 2))(merged)
common_layer2()
plot()

CCF for 100k dataset


In [None]:
def cross_conv_filter(window_size=5):
# Merge the two datasets on the 'movieId' column
    data = pd.merge(ratings, movies, on='movieId')

# Group the data by 'userId' and create a pivot table with 'movieId' as rows and 'userId' as columns
    ratings_matrix = data.pivot_table(index='movieId', columns='userId', values='rating')

  # Replace any missing values with zeros
    ratings_matrix = ratings_matrix.fillna(0)

  # Define a function to calculate the cross-convolution filter for a given movie

    # Get the movie id from the user via the keyboard
    movie_name = input("Enter a movie name: ")
    movie_year = input("Enter movie year: ")
    movie_name=movie_name+" ("+movie_year+")"
    #movies = pd.read_csv('u.item', sep='|', encoding='latin-1', usecols=[0, 1], names=['movie_id', 'movie_title'])
    movie_id = movies[movies['title'] == movie_name]['movieId'].values[0]

    # Get the ratings for the given movie
    movie_ratings = ratings_matrix.loc[movie_id].values

    # Pad the ratings array with zeros on both sides
    padded_ratings = np.pad(movie_ratings, (window_size//2, window_size//2), mode='constant', constant_values=0)

    # Define the filters
    h_filter = np.array([-1, 0, 1])
    v_filter = np.array([[-1], [0], [1]])

    # Calculate the horizontal convolution
    h_convolved_ratings = np.convolve(padded_ratings, h_filter, mode='valid')

    # Pad the horizontal convolved ratings array with zeros on both sides
    padded_h_convolved_ratings = np.pad(h_convolved_ratings, (window_size//2, window_size//2), mode='constant', constant_values=0)

    # Calculate the vertical convolution
    v_convolved_ratings = np.convolve(padded_h_convolved_ratings, v_filter.flatten(), mode='valid')

    # Get the top 10 highest values
    top_indices = np.argsort(v_convolved_ratings)[-10:]

    # Get the movie titles and scores
    movie_indices = ratings_matrix.index[top_indices]
    movie_scores = v_convolved_ratings[top_indices]
    movie_titles = movies[movies['movieId'].isin(movie_indices)]['title'].values

    # Print the recommended movies
    print('Recommended movies for movieId {}:'.format(movie_id))
    for i in range(len(movie_indices)):
        print('{}, score: {:.2f}'.format(movie_titles[i], movie_scores[i]))


# Example usage 100k
input_100k()
cross_conv_filter()
