 Markov Logic Networks on the public IMDB movie dataset for predicting which actors will likely work together in a future movie

In [None]:
import numpy as np
import pandas as pd
import markovlogicnet

Load the IMDB dataset into a Pandas DataFrame

In [None]:

imdb_data = pd.read_csv('path/to/imdb/data.csv')


Extract the list of actors and the list of movies from the dataset

In [None]:

actors = imdb_data['actor_name'].unique().tolist()
movies = imdb_data['movie_title'].unique().tolist()

Define the predicates that we will use in our MLN formulas

In [None]:

PRED_ACTOR = markovlogicnet.Predicate('Actor', 1)
PRED_MOVIE = markovlogicnet.Predicate('Movie', 1)
PRED_ACTED_IN = markovlogicnet.Predicate('ActedIn', 2)
PRED_WILL_ACT_IN = markovlogicnet.Predicate('WillActIn', 2)

Define the MLN formulas

In [None]:

formulas = [
    markovlogicnet.LogicFormula(weight=1, logic='1 { WillActIn(x, y) : Actor(x), Movie(y), !ActedIn(x, y) } 1'),
    markovlogicnet.LogicFormula(weight=2, logic='1 { WillActIn(x, y) : Actor(x), Movie(y) } 1')
]

Create a Markov Logic Network object and add the predicates and formulas to it

In [None]:

mln = markovlogicnet.MarkovLogicNetwork()
mln.add_predicate(PRED_ACTOR)
mln.add_predicate(PRED_MOVIE)
mln.add_predicate(PRED_ACTED_IN)
mln.add_predicate(PRED_WILL_ACT_IN)
mln.add_formula(formulas)

Create a set of ground atoms representing the actors and movies in the dataset

In [None]:

ground_atoms = []
for actor in actors:
    ground_atoms.append(markovlogicnet.GroundAtom(predicate=PRED_ACTOR, arguments=[actor]))
for movie in movies:
    ground_atoms.append(markovlogicnet.GroundAtom(predicate=PRED_MOVIE, arguments=[movie]))

Add the ground atoms to the MLN object

In [None]:

mln.add_atoms(ground_atoms)

Create a set of evidence atoms representing the actors and movies that have already worked together

In [None]:

evidence_atoms = []
for index, row in imdb_data.iterrows():
    actor = row['actor_name']
    movie = row['movie_title']
    evidence_atoms.append(markovlogicnet.GroundAtom(predicate=PRED_ACTED_IN, arguments=[actor, movie]))

Learn the weights of the MLN using the evidence atoms as training data

In [None]:

mln.learn_weights(evidence_atoms)

Use the MLN to predict which actors are likely to work together in a future movie

In [None]:

predicted_atoms = mln.infer_query([PRED_WILL_ACT_IN])
predicted_tuples = [atom.arguments for atom in predicted_atoms]
predicted_actors = list(set([t[0] for t in predicted_tuples]))
print("Predicted actors:", predicted_actors)