In [1]:
####### Test out the new push#########

project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'brave-watch-414204'

In [2]:
!pip install scikit-surprise
import pickle
import pandas as pd
from google.cloud import storage

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


#### Get Model from GCS

In [3]:
# Download the pickle file from GCS
def load_pickle_from_gcs(bucket_name, source_blob_name):
    """Load a model from a GCS bucket."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)

    # Download the pickle file content as bytes
    pickle_content = blob.download_as_bytes()

    # Deserialize the pickle content
    model = pickle.loads(pickle_content)

    return model

In [4]:
bucket_name = PROJECT_ID
folder_path = "Model"
file_name = "svd_model.pkl"

source_blob_name = f'{folder_path}/{file_name}'

cf_recommender = load_pickle_from_gcs(bucket_name, source_blob_name)

In [5]:
raw_interactions = pd.read_csv("gs://brave-watch-414204/Raw_Data/RAW_interactions.csv")
interactions = raw_interactions.groupby('user_id').filter(lambda x: len(x) > 5) # for testing only

In [6]:
#set cust_user_id
cust_user_id = 56680 #change this number to demonstrate for another user_id

# Get the list of all recipe IDs
recipe_ids = interactions['recipe_id'].unique()

# Get the list of recipe IDs that cust_user_id has not rated
rated_recipe_ids = interactions[interactions['user_id'] == cust_user_id]['recipe_id']
unrated_recipe_ids = list(set(recipe_ids) - set(rated_recipe_ids))

# Predict ratings for unrated recipes by user_id
predictions = []
for recipe_id in unrated_recipe_ids:
    prediction = cf_recommender.predict(cust_user_id, recipe_id)
    predictions.append({'recipe_id': recipe_id, 'predicted_rating': prediction.est})

print ("Total no. of predictions:", len(predictions))

# Convert predictions to DataFrame
predictions_df = pd.DataFrame(predictions)

# Sort recipes by predicted ratings in descending order
topN_recommendations = predictions_df.sort_values(by='predicted_rating', ascending=False)

# Print the top 5 recommendations
print("Top 5 Recommendations:")
print(topN_recommendations.head(5))

Total no. of predictions: 209282
Top 5 Recommendations:
       recipe_id  predicted_rating
13508      27210          5.495052
52872      97496          5.366322
33740      62874          5.269975
25201      48012          5.250849
7503       16842          5.229005
