In [9]:
import pandas as pd
# Loading the ratings Dataset

ratings = pd.read_csv('dataset/ratings_small.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [10]:
# Loading the Genrated ratings Dataset

ratings_gn = pd.read_csv('dataset/Genrated ratings Data.csv')
ratings_gn.head()

Unnamed: 0,userId,movieId,timestamp,rating
0,510,133730,849588178,2.09758
1,510,148672,846457615,3.033221
2,513,146590,851162656,2.529508
3,510,132772,846242746,2.091977
4,514,146968,833662076,0.333745


In [11]:
# Set the 'rating' values to 1 for all entries in 'ratings_gn' where the rating is less than 1
ratings_gn.loc[ratings_gn['rating'] < 1, 'rating'] = 1

# Set the 'rating' values to 5 for all entries in 'ratings_gn' where the rating is greater than 5.
ratings_gn.loc[ratings_gn['rating'] > 5, 'rating'] = 5

In [12]:
import mlflow
import mlflow.sklearn
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split
import pandas as pd
from mlflow.models.signature import infer_signature

# Start MLflow run
with mlflow.start_run():
    # Log hyperparameters
    mlflow.log_param("n_factors", 10)
    mlflow.log_param("n_epochs", 50)
    mlflow.log_param("learning_rate", 0.002)
    mlflow.log_param("reg_all", 0.1)
    mlflow.log_param("random_state", 42)

    # Prepare data and train the model
    reader = Reader(rating_scale=(1,5))
    data = Dataset.load_from_df(ratings_gn[['userId', 'movieId', 'rating']], reader)
    trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
    model = SVD(n_factors= 10, n_epochs= 50, lr_all= 0.002, reg_all= 0.1, random_state= 42)
    model.fit(trainset)

    # Prepare input example with float64 types
    input_example = pd.DataFrame({
        'uid': [1.0],  
        'iid': [2.0],  
    })
    
    input_example = input_example.astype({'uid': 'float64', 'iid': 'float64'})

    signature = infer_signature(input_example)

    # Log the model
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model",
        signature=signature
    )

    # Validate the serving input directly
    user_id = 8.0 
    item_id = 5.0  

    # Make a prediction for the specific user and item
    prediction = model.predict(uid=user_id, iid=item_id)

    print(f"Predicted rating for user {user_id} and item {item_id}: {prediction.est}")

    # Make predictions and calculate RMSE for the test set
    predictions = model.test(testset)
    rmse = accuracy.rmse(predictions, verbose=False)

    # Log metrics
    mlflow.log_metric("rmse", rmse)

print("Model logged and metrics calculated successfully.")

Predicted rating for user 8.0 and item 5.0: 3.345137135595807
Model logged and metrics calculated successfully.
