Here is an implemention of Markov Logic Networks on the  Movielens dataset for predicting what type of genre a movie will likely belong to given the actors.

In [None]:
import numpy as np
import pandas as pd
import pyswip.prolog as prolog

Load the Movielens dataset

In [None]:


movies_df = pd.read_csv('path/to/movies.csv')
ratings_df = pd.read_csv('path/to/ratings.csv')
tags_df = pd.read_csv('path/to/tags.csv')

Merge the dataframes

In [None]:


movie_ratings_df = pd.merge(movies_df, ratings_df, on='movieId')
movie_tags_df = pd.merge(movies_df, tags_df, on='movieId')
movie_ratings_tags_df = pd.merge(movie_ratings_df, movie_tags_df, on='movieId')

Convert dataframe to Prolog format

In [None]:

def to_prolog(df):
    prolog_str = ''
    for i, row in df.iterrows():
        prolog_str += 'rating({},{},{}).\n'.format(row['userId'], row['movieId'], row['rating'])
        prolog_str += 'tag({},{},{}).\n'.format(row['userId'], row['movieId'], row['tag'])
        prolog_str += 'genre({},{},{}).\n'.format(row['movieId'], row['genre'], 1)
        prolog_str += 'actor({},{},{}).\n'.format(row['movieId'], row['actor'], 1)
    return prolog_str

prolog_str = to_prolog(movie_ratings_tags_df)

Initialize Prolog engine

In [None]:

engine = prolog.Logic()

Assert Prolog clauses

In [None]:

engine.assertz(prolog_str)

Define Markov Logic Network (MLN) predicates

In [None]:

mln_predicates = {
    'rating': 'rating(_,_,_)',
    'tag': 'tag(_,_,_)',
    'genre': 'genre(_,_,_)',
    'actor': 'actor(_,_,_)'
}

Define Markov Logic Network (MLN) formulas

In [None]:

mln_formulas = [
    '0.5 genre(M, G, 1) :- actor(M, A, 1), genre(M, G, 1)',
    '0.5 genre(M, G, 1) :- actor(M, A, 1), tag(U, M, T), tag(M, U, T), genre(M, G, 1)',
    '0.5 genre(M, G, 1) :- actor(M, A, 1), rating(U, M, R), rating(M, U, R), genre(M, G, 1)'
]

Compile MLN predicates and formulas into a Prolog program

In [None]:

prolog_program = ''
for predicate in mln_predicates.values():
    prolog_program += predicate + '\n'
for formula in mln_formulas:
    prolog_program += formula + '\n'

Load the Prolog program

In [None]:

engine.consult(prolog_program)

Query the Prolog engine to predict movie genres

In [None]:

def predict_movie_genre(movie_id):
    query = 'genre({},G,1)'.format(movie_id)
    solutions = list(engine.query(query))
    genre_counts = {}
    for solution in solutions:
        genre = solution['G']
        if genre in genre_counts:
            genre_counts[genre] += 1
        else:
            genre_counts[genre] = 1
    return max(genre_counts, key=genre_counts.get)

test

In [None]:
movie_id = 1
predicted_genre = predict_movie_genre(movie_id)
print('The predicted genre for movie {} is {}'.format(movie_id, predicted_genre))