Skip to content

MNoorFawi/pytholog-as-a-logic-database

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

5 Commits
 
 
 
 
 
 

Repository files navigation

DVD Rental Database in pytholog

We will use DVD Rental database to feed a knowledge base as facts and rules, then logically query the database.

Here we can find how to create the database in postgresql and insert the data.

Let's connect to the database in python and see how it looks like:

import psycopg2
import pandas as pd

psql = psycopg2.connect(host = "localhost", database = "dvdrental",
                      user = "postgres", password = "password")
cursor = psql.cursor()
## fetch some data to confirm connection
pd.read_sql("SELECT * FROM language;", psql)

#    language_id                  name         last_update
# 0            1  English              2006-02-15 10:02:19
# 1            2  Italian              2006-02-15 10:02:19
# 2            3  Japanese             2006-02-15 10:02:19
# 3            4  Mandarin             2006-02-15 10:02:19
# 4            5  French               2006-02-15 10:02:19
# 5            6  German               2006-02-15 10:02:19

Let's see what the table names are:

cursor.execute("select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)';")
print(cursor.fetchall())

# [('actor',), ('store',), ('address',), ('category',), ('city',), ('country',), 
# ('customer',), ('film_actor',), ('film_category',), ('inventory',), ('language',), 
# ('rental',), ('staff',), ('payment',), ('film',), ('movies_rental',), ('compressed_movies_rental',)]
def query_defn(table):
    return f"SELECT * FROM {table};"

No we will read the tables we will query into python and do some transformation to have values in lowercase.

actor = pd.read_sql(query_defn("actor"), psql)
actor["Actor"] = (actor["first_name"] + "_" + actor["last_name"]).str.lower()
actor.head()

#    actor_id first_name  ...             last_update                Actor
# 0         1   Penelope  ... 2013-05-26 14:47:57.620     penelope_guiness
# 1         2       Nick  ... 2013-05-26 14:47:57.620        nick_wahlberg
# 2         3         Ed  ... 2013-05-26 14:47:57.620             ed_chase
# 3         4   Jennifer  ... 2013-05-26 14:47:57.620       jennifer_davis
# 4         5     Johnny  ... 2013-05-26 14:47:57.620  johnny_lollobrigida
# [5 rows x 5 columns]
language = pd.read_sql(query_defn("language"), psql)
film = pd.read_sql(query_defn("film"), psql)
category = pd.read_sql(query_defn("category"), psql)
#customer = pd.read_sql(query_defn("customer"), psql)
language["name"] = language["name"].str.lower()
film["title"] = film["title"].str.replace(" ", "_").str.lower()
category["name"] = category["name"].str.lower()
#customer["Customer"] = (customer["first_name"] + "_" + customer["last_name"]).str.lower()
film_category = pd.read_sql(query_defn("film_category"), psql)
#film[film.film_id.isin(film_category[film_category.category_id == 14].film_id)]

print(film.loc[film.film_id == 1, "title"])
# 4    academy_dinosaur
# Name: title, dtype: object

print(actor.head())
#    actor_id first_name  ...             last_update                Actor
# 0         1   Penelope  ... 2013-05-26 14:47:57.620     penelope_guiness
# 1         2       Nick  ... 2013-05-26 14:47:57.620        nick_wahlberg
# 2         3         Ed  ... 2013-05-26 14:47:57.620             ed_chase
# 3         4   Jennifer  ... 2013-05-26 14:47:57.620       jennifer_davis
# 4         5     Johnny  ... 2013-05-26 14:47:57.620  johnny_lollobrigida

Let's initiate the knowledge base and feed it with for loops.

We will use rules as the query statements and views if we need some joining and conditions.

import pytholog as pl
dvd = pl.KnowledgeBase("dvd_rental")

for i in range(film.shape[0]):
    dvd([f"film({film.film_id[i]}, {film.title[i]}, {film.language_id[i]})"])

for i in range(language.shape[0]):
    dvd([f"language({language.language_id[i]}, {language.name[i]})"])
    
## simple query	
dvd(["film_language(F, L) :- film(_, F, LID), language(LID, L)"])
dvd.query(pl.Expr("film_language(young_language, L)"))
# [{'L': 'english'}]

We will create film_category view

for i in range(category.shape[0]):
    dvd([f"category({category.category_id[i]}, {category.name[i]})"])
    
for i in range(film_category.shape[0]):
    dvd([f"filmcategory({film_category.film_id[i]}, {film_category.category_id[i]})"])
    
dvd(["film_category(F, C) :- film(FID, F, _), filmcategory(FID, CID), category(CID, C)"]) ## "_" to neglect this term

## another query to see what films in sci-fi category 
dvd.query(pl.Expr("film_category(F, sci-fi)"))

# [{'F': 'annie_identity'},
#  {'F': 'armageddon_lost'},
# .....

#  {'F': 'titans_jerk'},
#  {'F': 'trojan_tomorrow'},
#  {'F': 'unforgiven_zoolander'},
#  {'F': 'vacation_boondock'},
#  {'F': 'weekend_personal'},
#  {'F': 'whisperer_giant'},
#  {'F': 'wonderland_christmas'}]

Let's join actors and films

for i in range(actor.shape[0]):
    dvd([f"actor({actor.actor_id[i]}, {actor.Actor[i]})"])
    
film_actor = pd.read_sql(query_defn("film_actor"), psql)
#print(film_actor[film_actor["actor_id"] == 3].shape)
print(film_actor.shape)
#(5462, 3)
for i in range(film_actor.shape[0]):
    dvd([f"filmactor({film_actor.film_id[i]}, {film_actor.actor_id[i]})"])

dvd(["film_actor(F, A) :- film(FID, F, _), filmactor(FID, AID), actor(AID, A)"])

dvd.query(pl.Expr("film_actor(annie_identity, Actor)"))
#[{'Actor': 'adam_grant'}, {'Actor': 'cate_mcqueen'}, {'Actor': 'greta_keitel'}]

## query actors in a film
dvd.query(pl.Expr("film_actor(academy_dinosaur, Actor)"))
# [{'Actor': 'penelope_guiness'},
#  {'Actor': 'christian_gable'},
#  {'Actor': 'lucille_tracy'},
#  {'Actor': 'sandra_peck'},
#  {'Actor': 'johnny_cage'},
#  {'Actor': 'mena_temple'},
#  {'Actor': 'warren_nolte'},
#  {'Actor': 'oprah_kilmer'},
#  {'Actor': 'rock_dukakis'},
#  {'Actor': 'mary_keitel'}]

### query films that an actor performed in
dvd.query(pl.Expr("film_actor(Film, penelope_guiness)"))
# [{'Film': 'academy_dinosaur'},
#  {'Film': 'anaconda_confessions'},
#  {'Film': 'angels_life'},
#  {'Film': 'bulworth_commandments'},
#  {'Film': 'cheaper_clyde'},
#  {'Film': 'color_philadelphia'},
#  {'Film': 'elephant_trojan'},
#  {'Film': 'gleaming_jawbreaker'},
#  {'Film': 'human_graffiti'},
#  {'Film': 'king_evolution'},
#  {'Film': 'lady_stage'},
#  {'Film': 'language_cowboy'},
#  {'Film': 'mulholland_beast'},
#  {'Film': 'oklahoma_jumanji'},
#  {'Film': 'rules_human'},
#  {'Film': 'splash_gump'},
#  {'Film': 'vertigo_northwest'},
#  {'Film': 'westward_seabiscuit'},
#  {'Film': 'wizard_coldblooded'}]

### simple yes or no query
dvd.query(pl.Expr("film_actor(academy_dinosaur, lucille_tracy)"))
# ['Yes']

Actor Category view to see in which categories an actor performed.

dvd(["actor_category(A, C) :- film_actor(F, A), film_category(F, C)"])

jd = dvd.query(pl.Expr("actor_category(jennifer_davis, Category)"))

from pprint import pprint
merged = {}
for d in jd:
    for k, v in d.items():
        if k not in merged: merged[k] = set()
        merged[k].add(v)

pprint(merged)
# {'Category': {'action',
#               'animation',
#               'comedy',
#               'documentary',
#               'drama',
#               'family',
#               'horror',
#               'music',
#               'new',
#               'sci-fi',
#               'sports',
#               'travel'}}

Finally, let's now write those facts and rules to a prolog file.

with open("dvd_rental.pl", "w") as f:
    for i in dvd.db.keys():
        for d in dvd.db[i]["facts"]:
            f.write(d.to_string() + "." + "\n")
    
f.close()

About

Using pytholog library, which enables using logic programming in python, as a database and query data with prolog syntax.

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages