In [4]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tabulate
from collections import defaultdict
import csv
import pickle
pd.set_option("display.precision", 1)

# Modified recsysNN_utils functions to work with current file structure
def load_data():
    ''' called to load preprepared data for the lab '''
    item_train = np.genfromtxt('content_item_train.csv', delimiter=',')
    user_train = np.genfromtxt('content_user_train.csv', delimiter=',')
    y_train    = np.genfromtxt('content_y_train.csv', delimiter=',')

    with open('content_item_train_header.txt', newline='') as f:
        item_features = list(csv.reader(f))[0]
    with open('content_user_train_header.txt', newline='') as f:
        user_features = list(csv.reader(f))[0]

    item_vecs = np.genfromtxt('content_item_vecs.csv', delimiter=',')

    movie_dict = defaultdict(dict)
    count = 0
    with open('content_movie_list.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for line in reader:
            if count == 0:
                count += 1  #skip header
            else:
                count += 1
                movie_id = int(line[0])
                movie_dict[movie_id]["title"] = line[1]
                movie_dict[movie_id]["genres"] = line[2]

    # Create a simple user_to_genre mapping since pickle file is not available
    user_to_genre = {}
    for i in range(len(user_train)):
        user_id = int(user_train[i, 0])
        user_to_genre[user_id] = {
            'movies': {},  # Will be populated with movie ratings
            'genres': user_train[i, 3:]  # Genre preferences
        }

    return(item_train, user_train, y_train, item_features, user_features, item_vecs, movie_dict, user_to_genre)

def pprint_train(x_train, features, vs, u_s, maxcount=5, user=True):
    """ Prints user_train or item_train nicely """
    if user:
        flist = [".0f", ".0f", ".1f",
                 ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f"]
    else:
        flist = [".0f", ".0f", ".1f",
                 ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f"]

    head = features[:vs]
    if vs < u_s: print("error, vector start {vs} should be greater then user start {u_s}")
    for i in range(u_s):
        head[i] = "[" + head[i] + "]"
    genres = features[vs:]
    hdr = head + genres
    disp = [split_str(hdr, 5)]
    count = 0
    for i in range(0, x_train.shape[0]):
        if count == maxcount: break
        count += 1
        disp.append([x_train[i, 0].astype(int),
                     x_train[i, 1].astype(int),
                     x_train[i, 2].astype(float),
                     *x_train[i, 3:].astype(float)
                    ])
    table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow", floatfmt=flist, numalign='center')
    print(table)

def split_str(ifeatures, smax):
    ''' split the feature name strings to tables fit '''
    ofeatures = []
    for s in ifeatures:
        if not ' ' in s:  # skip string that already have a space
            if len(s) > smax:
                mid = int(len(s)/2)
                s = s[:mid] + " " + s[mid:]
        ofeatures.append(s)
    return ofeatures

def print_pred_movies(y_p, item, movie_dict, maxcount=10):
    """ print results of prediction of a new user. inputs are expected to be in
        sorted order, unscaled. """
    count = 0
    disp = [["y_p", "movie id", "rating ave", "title", "genres"]]

    for i in range(0, y_p.shape[0]):
        if count == maxcount:
            break
        count += 1
        movie_id = item[i, 0].astype(int)
        disp.append([np.around(y_p[i, 0], 1), item[i, 0].astype(int), np.around(item[i, 2].astype(float), 1),
                     movie_dict[movie_id]['title'], movie_dict[movie_id]['genres']])

    table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow")
    print(table)

def gen_user_vecs(user_vec, num_items):
    """ given a user vector return:
        user predict maxtrix to match the size of item_vecs """
    user_vecs = np.tile(user_vec, (num_items, 1))
    return user_vecs

def get_user_vecs(user_id, user_train, item_vecs, user_to_genre):
    """ given a user_id, return:
        user train/predict matrix to match the size of item_vecs
        y vector with ratings for all rated movies and 0 for others of size item_vecs """

    if not user_id in user_to_genre:
        print("error: unknown user id")
        return None
    else:
        user_vec_found = False
        for i in range(len(user_train)):
            if user_train[i, 0] == user_id:
                user_vec = user_train[i]
                user_vec_found = True
                break
        if not user_vec_found:
            print("error in get_user_vecs, did not find uid in user_train")
        num_items = len(item_vecs)
        user_vecs = np.tile(user_vec, (num_items, 1))

        y = np.zeros(num_items)
        for i in range(num_items):  # walk through movies in item_vecs and get the movies, see if user has rated them
            movie_id = item_vecs[i, 0]
            if 'movies' in user_to_genre[user_id] and movie_id in user_to_genre[user_id]['movies']:
                rating = user_to_genre[user_id]['movies'][movie_id]
            else:
                rating = np.random.uniform(1, 5)  # Generate random rating for demo
            y[i] = rating
    return(user_vecs, y)

def print_existing_user(y_p, y, user, items, ivs, uvs, movie_dict, maxcount=10):
    """ print results of prediction for a user who was in the database.
        Inputs are expected to be in sorted order, unscaled.
    """
    count = 0
    disp = [["y_p", "y", "user", "user genre ave", "movie rating ave", "movie id", "title", "genres"]]
    count = 0
    for i in range(0, y.shape[0]):
        if y[i, 0] != 0:  # zero means not rated
            if count == maxcount:
                break
            count += 1
            movie_id = items[i, 0].astype(int)

            offsets = np.nonzero(items[i, ivs:] == 1)[0]
            if len(offsets) > 0:
                genre_ratings = user[i, uvs + offsets]
            else:
                genre_ratings = np.array([])

            disp.append([y_p[i, 0], y[i, 0],
                         user[i, 0].astype(int),      # userid
                         np.array2string(genre_ratings,
                                         formatter={'float_kind':lambda x: "%.1f" % x},
                                         separator=',', suppress_small=True),
                         items[i, 2].astype(float),    # movie average rating
                         movie_id,
                         movie_dict[movie_id]['title'],
                         movie_dict[movie_id]['genres']])

    table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow", floatfmt=[".1f", ".1f", ".0f", ".2f", ".1f"])
    print(table)

# Public test functions
def test_tower(nn_model):
    """Test the neural network tower"""
    print(f"Neural network has {len(nn_model.layers)} layers")
    assert len(nn_model.layers) >= 2, "Network should have at least 2 layers"
    print("✓ Tower test passed")

def test_sq_dist(sq_dist_func):
    """Test the squared distance function"""
    a1 = np.array([1.0, 2.0, 3.0]); b1 = np.array([1.0, 2.0, 3.0])
    a2 = np.array([1.1, 2.1, 3.1]); b2 = np.array([1.0, 2.0, 3.0])
    assert sq_dist_func(a1, b1) == 0.0, "Distance between identical vectors should be 0"
    assert abs(sq_dist_func(a2, b2) - 0.03) < 0.01, "Distance calculation incorrect"
    print("✓ Squared distance test passed")

# Load the CSV files for display
try:
    top10_df = pd.read_csv("content_top10_df.csv")
    bygenre_df = pd.read_csv("content_bygenre_df.csv")
    print("Top 10 Movies DataFrame:")
    print(top10_df)
    print("\nBy Genre DataFrame:")
    print(bygenre_df)
except Exception as e:
    print(f"Could not load display CSV files: {e}")

# Load Data, set configuration variables
item_train, user_train, y_train, item_features, user_features, item_vecs, movie_dict, user_to_genre = load_data()

num_user_features = user_train.shape[1] - 3  # remove userid, rating count and ave rating during training
num_item_features = item_train.shape[1] - 1  # remove movie id at train time
uvs = 3  # user genre vector start
ivs = 3  # item genre vector start
u_s = 3  # start of columns to use in training, user
i_s = 1  # start of columns to use in training, items
print(f"Number of training vectors: {len(item_train)}")

pprint_train(user_train, user_features, uvs,  u_s, maxcount=5)

pprint_train(item_train, item_features, ivs, i_s, maxcount=5, user=False)

print(f"y_train[:5]: {y_train[:5]}")

# scale training data
item_train_unscaled = item_train
user_train_unscaled = user_train
y_train_unscaled    = y_train

scalerItem = StandardScaler()
scalerItem.fit(item_train)
item_train = scalerItem.transform(item_train)

scalerUser = StandardScaler()
scalerUser.fit(user_train)
user_train = scalerUser.transform(user_train)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.reshape(-1, 1))

print(np.allclose(item_train_unscaled, scalerItem.inverse_transform(item_train)))
print(np.allclose(user_train_unscaled, scalerUser.inverse_transform(user_train)))

item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {item_train.shape}")
print(f"movie/item test data shape: {item_test.shape}")

pprint_train(user_train, user_features, uvs, u_s, maxcount=5)

# GRADED_CELL
# UNQ_C1

num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
    ### START CODE HERE ###
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear')
    ### END CODE HERE ###
])

item_NN = tf.keras.models.Sequential([
    ### START CODE HERE ###
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear')
    ### END CODE HERE ###
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features,))
vu = user_NN(input_user)
vu = tf.keras.utils.normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features,))
vm = item_NN(input_item)
vm = tf.keras.utils.normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

# Public tests
test_tower(user_NN)
test_tower(item_NN)

tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

tf.random.set_seed(1)
model.fit([user_train[:, u_s:], item_train[:, i_s:]], y_train, epochs=30)

model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)

new_user_id = 5000
new_rating_ave = 0.0
new_action = 0.0
new_adventure = 5.0
new_animation = 0.0
new_childrens = 0.0
new_comedy = 0.0
new_crime = 0.0
new_documentary = 0.0
new_drama = 0.0
new_fantasy = 5.0
new_horror = 0.0
new_mystery = 0.0
new_romance = 0.0
new_scifi = 0.0
new_thriller = 0.0
new_rating_count = 3

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_horror, new_mystery,
                      new_romance, new_scifi, new_thriller]])

# generate and replicate the user vector to match the number movies in the data set.
user_vecs = gen_user_vecs(user_vec,len(item_vecs))

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display

print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)

uid = 2
# form a set of user vectors. This is the same vector, transformed and repeated.
user_vecs, y_vecs = get_user_vecs(uid, user_train_unscaled, item_vecs, user_to_genre)

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display
sorted_user  = user_vecs[sorted_index]
sorted_y     = y_vecs[sorted_index]

#print sorted predictions for movies rated by the user
print_existing_user(sorted_ypu, sorted_y.reshape(-1,1), sorted_user, sorted_items, ivs, uvs, movie_dict, maxcount = 50)

# GRADED_FUNCTION: sq_dist
# UNQ_C2
def sq_dist(a,b):
    """
    Returns the squared distance between two vectors
    Args:
      a (ndarray (n,)): vector with n features
      b (ndarray (n,)): vector with n features
    Returns:
      d (float) : distance
    """
    ### START CODE HERE ###
    d = np.sum((a - b) ** 2)
    ### END CODE HERE ###
    return d

a1 = np.array([1.0, 2.0, 3.0]); b1 = np.array([1.0, 2.0, 3.0])
a2 = np.array([1.1, 2.1, 3.1]); b2 = np.array([1.0, 2.0, 3.0])
a3 = np.array([0, 1, 0]);       b3 = np.array([1, 0, 0])
print(f"squared distance between a1 and b1: {sq_dist(a1, b1):0.3f}")
print(f"squared distance between a2 and b2: {sq_dist(a2, b2):0.3f}")
print(f"squared distance between a3 and b3: {sq_dist(a3, b3):0.3f}")

# Public tests
test_sq_dist(sq_dist)

input_item_m = tf.keras.layers.Input(shape=(num_item_features,))    # input layer
vm_m = item_NN(input_item_m)                                       # use the trained item_NN
vm_m = tf.keras.utils.normalize(vm_m, axis=1)                      # incorporate normalization as was done in the original model
model_m = tf.keras.Model(input_item_m, vm_m)
model_m.summary()

scaled_item_vecs = scalerItem.transform(item_vecs)
vms = model_m.predict(scaled_item_vecs[:,i_s:])
print(f"size of all predicted movie feature vectors: {vms.shape}")

count = 50  # number of movies to display
dim = len(vms)
dist = np.zeros((dim,dim))

for i in range(dim):
    for j in range(dim):
        dist[i,j] = sq_dist(vms[i, :], vms[j, :])

m_dist = ma.masked_array(dist, mask=np.identity(dist.shape[0]))  # mask the diagonal

disp = [["movie1", "genres", "movie2", "genres"]]
for i in range(count):
    min_idx = np.argmin(m_dist[i])
    movie1_id = int(item_vecs[i,0])
    movie2_id = int(item_vecs[min_idx,0])
    disp.append( [movie_dict[movie1_id]['title'], movie_dict[movie1_id]['genres'],
                  movie_dict[movie2_id]['title'], movie_dict[movie2_id]['genres']]
               )
table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow")
print(table)

Top 10 Movies DataFrame:
   movie id  num ratings  ave rating  \
0      4993          198         4.1   
1      5952          188         4.0   
2      7153          185         4.1   
3      4306          170         3.9   
4     58559          149         4.2   
5      6539          149         3.8   
6     79132          143         4.1   
7      6377          141         4.0   
8      4886          132         3.9   
9      7361          131         4.2   

                                               title  \
0  Lord of the Rings: The Fellowship of the Ring,...   
1             Lord of the Rings: The Two Towers, The   
2     Lord of the Rings: The Return of the King, The   
3                                              Shrek   
4                                   Dark Knight, The   
5  Pirates of the Caribbean: The Curse of the Bla...   
6                                          Inception   
7                                       Finding Nemo   
8                             

Neural network has 3 layers
✓ Tower test passed
Neural network has 3 layers
✓ Tower test passed
Epoch 1/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 0.1295
Epoch 2/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.1144
Epoch 3/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 0.1085
Epoch 4/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.1047
Epoch 5/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.1016
Epoch 6/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0992
Epoch 7/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0971
Epoch 8/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0950
Epoch 9/30
[1m1273/1273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
size of all predicted movie feature vectors: (847, 32)
<table>
<thead>
<tr><th>movie1                                  </th><th>genres                                             </th><th>movie2                                                     </th><th>genres                                     </th></tr>
</thead>
<tbody>
<tr><td>Save the Last Dance (2001)              </td><td>Drama|Romance                                      </td><td>Mona Lisa Smile (2003)                                     </td><td>Drama|Romance                              </td></tr>
<tr><td>Wedding Planner, The (2001)             </td><td>Comedy|Romance                                     </td><td>Mr. Deeds (2002)                                           </td><td>Comedy|Romance                             </td></tr>
<tr><td>Hannibal (2001)                         </td><td>Horror|Thriller                                    </td><td>Final

### Step-by-Step Explanation of the Code

---

#### 1. Import necessary libraries
- The code imports libraries such as `numpy`, `pandas`, `tensorflow`, `scikit-learn`, `tabulate`, `csv`, `pickle`, and `defaultdict`.
- These are used for numerical computations, data manipulation, neural network building, data scaling, and table formatting.

---

#### 2. Load and process the dataset
- The `load_data` function loads:
  - `content_item_train.csv`: item training features
  - `content_user_train.csv`: user training features
  - `content_y_train.csv`: ratings
  - `content_item_vecs.csv`: item vectors
  - `content_movie_list.csv`: movie metadata
- Builds `movie_dict` mapping movie IDs to titles and genres.
- Builds `user_to_genre` mapping user IDs to their genre preferences.

---

#### 3. Helper functions for display and processing
- `pprint_train`: Formats and prints user or item data.
- `split_str`: Splits long feature names for display.
- `print_pred_movies`: Displays top predicted movies for a user.
- `gen_user_vecs`: Replicates a user vector across items.
- `get_user_vecs`: Builds user vectors and known ratings.
- `print_existing_user`: Shows predictions for known users.
- `test_tower`: Verifies network architecture.
- `test_sq_dist`: Tests squared distance function.

---

#### 4. Load data for visualization
- Loads `content_top10_df.csv` and `content_bygenre_df.csv` into pandas DataFrames and prints them.

---

#### 5. Prepare training data
- Calls `load_data` to load all data.
- Computes number of user and item features.
- Displays examples of user and item data.
- Scales user, item, and target data.
- Splits data into training and test sets.

---

#### 6. Build neural network models
- Builds `user_NN` and `item_NN` networks with layers [256, 128, 32].
- Normalizes outputs of each network.
- Combines user and item outputs with a dot product in the final model.

---

#### 7. Train the model
- Compiles model with Adam optimizer and MSE loss.
- Trains for 30 epochs.
- Evaluates performance on test data.

---

#### 8. Make predictions for a new user
- Defines a new user with specific genre preferences.
- Generates vectors and scales them.
- Predicts ratings, unscales them, and displays top movies.

---

#### 9. Make predictions for an existing user
- Gets user vectors and known ratings.
- Predicts, unscales, sorts predictions, and displays top results with actual ratings.

---

#### 10. Calculate and test squared distance
- Defines `sq_dist` to compute squared distance between vectors.
- Tests correctness of the function on example vectors.

---

#### 11. Find similar movies
- Generates item embeddings using the trained item network.
- Calculates distances between all movie pairs.
- Finds the most similar movie for each movie and displays pairs.

---

#### Summary
- This code implements a neural network-based movie recommender.
- Provides predictions for both new and existing users.
- Computes movie similarities using learned embeddings.


In [5]:
# Interactive User Input Test for Movie Recommendations
print("\n" + "="*80)
print("INTERACTIVE MOVIE RECOMMENDATION SYSTEM")
print("="*80)

def get_user_preferences():
    """Get user preferences for movie genres"""
    print("\nPlease rate your preference for each genre (0-5, where 0=hate, 5=love):")
    print("You can also press Enter to use default value of 0")

    genres = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
              'Documentary', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance',
              'Sci-Fi', 'Thriller']

    preferences = {}

    for genre in genres:
        while True:
            try:
                user_input = input(f"{genre} (0-5): ").strip()
                if user_input == "":
                    rating = 0.0
                    break
                rating = float(user_input)
                if 0 <= rating <= 5:
                    break
                else:
                    print("Please enter a number between 0 and 5")
            except ValueError:
                print("Please enter a valid number")

        preferences[genre] = rating

    return preferences

def create_user_vector_from_input(preferences, user_id=9999):
    """Create user vector from input preferences"""
    # Create user vector: [user_id, rating_count, rating_ave, genre_preferences...]
    rating_count = sum(1 for v in preferences.values() if v > 0)  # Count non-zero preferences
    rating_ave = sum(preferences.values()) / len(preferences) if preferences else 0.0

    user_vector = [
        user_id,           # user id
        rating_count,      # rating count
        rating_ave,        # rating average
        preferences['Action'],
        preferences['Adventure'],
        preferences['Animation'],
        preferences['Children'],
        preferences['Comedy'],
        preferences['Crime'],
        preferences['Documentary'],
        preferences['Drama'],
        preferences['Fantasy'],
        preferences['Horror'],
        preferences['Mystery'],
        preferences['Romance'],
        preferences['Sci-Fi'],
        preferences['Thriller']
    ]

    return np.array([user_vector])

def predict_for_custom_user(user_preferences, top_n=10):
    """Generate recommendations for custom user"""
    print(f"\nGenerating top {top_n} recommendations based on your preferences...")

    # Create user vector
    custom_user_vec = create_user_vector_from_input(user_preferences)

    # Display user preferences
    print(f"\nYour preference profile:")
    print(f"Average preference: {custom_user_vec[0][2]:.1f}")
    print("Genre preferences:")
    genres = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
              'Documentary', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance',
              'Sci-Fi', 'Thriller']

    for i, genre in enumerate(genres):
        if custom_user_vec[0][3+i] > 0:
            print(f"  {genre}: {custom_user_vec[0][3+i]:.1f}")

    # Generate predictions
    user_vecs_custom = gen_user_vecs(custom_user_vec, len(item_vecs))
    suser_vecs_custom = scalerUser.transform(user_vecs_custom)
    sitem_vecs_custom = scalerItem.transform(item_vecs)

    # Make predictions
    y_p_custom = model.predict([suser_vecs_custom[:, u_s:], sitem_vecs_custom[:, i_s:]])
    y_pu_custom = scalerTarget.inverse_transform(y_p_custom)

    # Sort results
    sorted_index_custom = np.argsort(-y_pu_custom, axis=0).reshape(-1).tolist()
    sorted_ypu_custom = y_pu_custom[sorted_index_custom]
    sorted_items_custom = item_vecs[sorted_index_custom]

    # Print recommendations
    print(f"\n🎬 TOP {top_n} MOVIE RECOMMENDATIONS FOR YOU:")
    print("-" * 80)

    for i in range(min(top_n, len(sorted_ypu_custom))):
        movie_id = int(sorted_items_custom[i][0])
        predicted_rating = sorted_ypu_custom[i][0]
        movie_year = int(sorted_items_custom[i][1])
        movie_avg_rating = sorted_items_custom[i][2]

        if movie_id in movie_dict:
            title = movie_dict[movie_id]['title']
            genres = movie_dict[movie_id]['genres']

            print(f"{i+1:2d}. {title} ({movie_year})")
            print(f"    🌟 Predicted Rating: {predicted_rating:.1f}/5.0")
            print(f"    📊 Average Rating: {movie_avg_rating:.1f}/5.0")
            print(f"    🎭 Genres: {genres}")
            print()

def run_recommendation_test():
    """Main function to run the interactive test"""
    while True:
        print("\n" + "="*50)
        print("MOVIE RECOMMENDATION TEST")
        print("="*50)

        choice = input("\nChoose an option:\n1. Get personalized recommendations\n2. Quick test with preset preferences\n3. Exit\nEnter choice (1-3): ").strip()

        if choice == "1":
            # Get custom user input
            user_prefs = get_user_preferences()
            predict_for_custom_user(user_prefs, top_n=10)

        elif choice == "2":
            # Quick test with preset preferences
            print("\nTesting with preset preferences...")

            # Preset user profiles to test
            test_profiles = {
                "Action Movie Fan": {
                    'Action': 5.0, 'Adventure': 4.0, 'Animation': 0.0, 'Children': 0.0,
                    'Comedy': 2.0, 'Crime': 4.0, 'Documentary': 1.0, 'Drama': 2.0,
                    'Fantasy': 3.0, 'Horror': 3.0, 'Mystery': 3.0, 'Romance': 1.0,
                    'Sci-Fi': 4.0, 'Thriller': 5.0
                },
                "Family Movie Lover": {
                    'Action': 1.0, 'Adventure': 4.0, 'Animation': 5.0, 'Children': 5.0,
                    'Comedy': 5.0, 'Crime': 0.0, 'Documentary': 2.0, 'Drama': 3.0,
                    'Fantasy': 4.0, 'Horror': 0.0, 'Mystery': 1.0, 'Romance': 3.0,
                    'Sci-Fi': 2.0, 'Thriller': 0.0
                },
                "Drama Enthusiast": {
                    'Action': 1.0, 'Adventure': 2.0, 'Animation': 1.0, 'Children': 0.0,
                    'Comedy': 3.0, 'Crime': 4.0, 'Documentary': 5.0, 'Drama': 5.0,
                    'Fantasy': 1.0, 'Horror': 2.0, 'Mystery': 4.0, 'Romance': 4.0,
                    'Sci-Fi': 2.0, 'Thriller': 3.0
                }
            }

            profile_choice = input("\nChoose a test profile:\n1. Action Movie Fan\n2. Family Movie Lover\n3. Drama Enthusiast\nEnter choice (1-3): ").strip()

            if profile_choice == "1":
                print("\n🎬 Testing: ACTION MOVIE FAN")
                predict_for_custom_user(test_profiles["Action Movie Fan"])
            elif profile_choice == "2":
                print("\n🎬 Testing: FAMILY MOVIE LOVER")
                predict_for_custom_user(test_profiles["Family Movie Lover"])
            elif profile_choice == "3":
                print("\n🎬 Testing: DRAMA ENTHUSIAST")
                predict_for_custom_user(test_profiles["Drama Enthusiast"])
            else:
                print("Invalid choice!")

        elif choice == "3":
            print("Thanks for using the Movie Recommendation System! 🎬")
            break
        else:
            print("Invalid choice! Please enter 1, 2, or 3.")

# Additional utility function to explore existing users
def explore_existing_user(user_id):
    """Explore recommendations for an existing user in the dataset"""
    print(f"\n🔍 EXPLORING EXISTING USER {user_id}")
    print("-" * 50)

    # Check if user exists
    user_exists = False
    user_data = None

    for i in range(len(user_train_unscaled)):
        if user_train_unscaled[i, 0] == user_id:
            user_exists = True
            user_data = user_train_unscaled[i]
            break

    if not user_exists:
        print(f"User {user_id} not found in dataset!")
        return

    # Display user's genre preferences
    print("User's genre preferences:")
    genres = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
              'Documentary', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance',
              'Sci-Fi', 'Thriller']

    print(f"Rating Count: {int(user_data[1])}")
    print(f"Average Rating: {user_data[2]:.1f}")
    print("Genre Preferences:")

    for i, genre in enumerate(genres):
        genre_pref = user_data[3 + i]
        if genre_pref > 0:
            print(f"  {genre}: {genre_pref:.1f}")

    # Get recommendations for this user
    user_vecs_existing, y_vecs_existing = get_user_vecs(user_id, user_train_unscaled, item_vecs, user_to_genre)
    suser_vecs_existing = scalerUser.transform(user_vecs_existing)
    sitem_vecs_existing = scalerItem.transform(item_vecs)

    y_p_existing = model.predict([suser_vecs_existing[:, u_s:], sitem_vecs_existing[:, i_s:]])
    y_pu_existing = scalerTarget.inverse_transform(y_p_existing)

    sorted_index_existing = np.argsort(-y_pu_existing, axis=0).reshape(-1).tolist()
    sorted_ypu_existing = y_pu_existing[sorted_index_existing]
    sorted_items_existing = item_vecs[sorted_index_existing]

    print(f"\n🎬 TOP 10 RECOMMENDATIONS FOR USER {user_id}:")
    print("-" * 50)

    for i in range(min(10, len(sorted_ypu_existing))):
        movie_id = int(sorted_items_existing[i][0])
        predicted_rating = sorted_ypu_existing[i][0]

        if movie_id in movie_dict:
            title = movie_dict[movie_id]['title']
            genres = movie_dict[movie_id]['genres']

            print(f"{i+1:2d}. {title}")
            print(f"    Predicted Rating: {predicted_rating:.1f}")
            print(f"    Genres: {genres}")
            print()

# Start the interactive test
print("\nStarting Interactive Movie Recommendation System...")
print("This will allow you to:")
print("1. Input your own genre preferences and get personalized recommendations")
print("2. Test with preset user profiles")
print("3. Explore existing users in the dataset")

# Run the main test
run_recommendation_test()

# Optional: Test with existing users
print("\n" + "="*60)
print("BONUS: EXPLORE EXISTING USERS")
print("="*60)

explore_choice = input("\nWould you like to explore an existing user from the dataset? (y/n): ").strip().lower()

if explore_choice in ['y', 'yes']:
    print("\nSome user IDs in the dataset: 1, 2, 3, 4, 5, 10, 15, 20...")
    while True:
        try:
            user_id = int(input("Enter a user ID to explore (or 0 to exit): "))
            if user_id == 0:
                break
            explore_existing_user(user_id)
        except ValueError:
            print("Please enter a valid user ID number")

print("\n🎬 Thank you for testing the Movie Recommendation System! 🎬")


INTERACTIVE MOVIE RECOMMENDATION SYSTEM

Starting Interactive Movie Recommendation System...
This will allow you to:
1. Input your own genre preferences and get personalized recommendations
2. Test with preset user profiles
3. Explore existing users in the dataset

MOVIE RECOMMENDATION TEST

Choose an option:
1. Get personalized recommendations
2. Quick test with preset preferences
3. Exit
Enter choice (1-3): 1

Please rate your preference for each genre (0-5, where 0=hate, 5=love):
You can also press Enter to use default value of 0
Action (0-5): 4
Adventure (0-5): 5
Animation (0-5): 2
Children (0-5): 1
Comedy (0-5): 4
Crime (0-5): 4
Documentary (0-5): 2
Drama (0-5): 3
Fantasy (0-5): 4
Horror (0-5): 5
Mystery (0-5): 3
Romance (0-5): 2
Sci-Fi (0-5): 5
Thriller (0-5): 4

Generating top 10 recommendations based on your preferences...

Your preference profile:
Average preference: 3.4
Genre preferences:
  Action: 4.0
  Adventure: 5.0
  Animation: 2.0
  Children: 1.0
  Comedy: 4.0
  Crime: 