In [25]:
import pandas as pd
from tqdm import tqdm
from sqlmodel import Session
import yaml 
import os


In [26]:
with open("./config.yaml","r") as f:
    config = yaml.safe_load(f)

In [27]:
from sqlmodel import SQLModel, create_engine

from models import Movie,Genre,Movie_Genre,Tag,Genome_Tag,Genome_Score,Rating


In [28]:
if config["database"] == 'postgres':
    user = config["postgres_params"]["user"]
    password = config["postgres_params"]["password"]
    host_url = config["postgres_params"]["host_url"]
    base = config["postgres_params"]["base"]

    database_url = f"postgresql://{user}:{password}@{host_url}:5432/{base}"
else:
    database_url = f"sqlite:///{config['sqlite']['path']}"

engine = create_engine(database_url, echo=True)

SQLModel.metadata.create_all(engine)

2023-12-19 00:16:19,753 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,753 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("genre")
2023-12-19 00:16:19,753 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-19 00:16:19,754 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("movie")
2023-12-19 00:16:19,754 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-19 00:16:19,754 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("movie_genre")
2023-12-19 00:16:19,755 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-19 00:16:19,755 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("tag")
2023-12-19 00:16:19,755 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-19 00:16:19,755 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("genome_tag")
2023-12-19 00:16:19,755 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-19 00:16:19,756 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("genome_score")
2023-12-19 00:16:19,756 INFO sqlalchemy.engine.Engine [raw sql

In [29]:
from sqlmodel import create_engine, Session, select


In [30]:
with Session(engine) as session:

  l_genreid = session.exec(select(Genre.genreid,Genre.name).order_by(Genre.genreid)).all()
  df_genre =pd.DataFrame(l_genreid)
  nb_genre = len(l_genreid)

2023-12-19 00:16:19,764 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,765 INFO sqlalchemy.engine.Engine SELECT genre.genreid, genre.name 
FROM genre ORDER BY genre.genreid
2023-12-19 00:16:19,765 INFO sqlalchemy.engine.Engine [generated in 0.00036s] ()
2023-12-19 00:16:19,766 INFO sqlalchemy.engine.Engine ROLLBACK


In [31]:
col_name = ["movieid"]+ list(df_genre.name)

In [32]:
id_movie = 2
statement = select(Movie.movieid, Genre.genreid)\
    .select_from(Movie)\
    .join(Movie_Genre, Movie.movieid == Movie_Genre.movieid)\
    .join(Genre, Genre.genreid == Movie_Genre.genreid)\
    .where(Movie.movieid == id_movie)




In [33]:
with Session(engine) as session:

  results = session.exec(statement).all()



2023-12-19 00:16:19,785 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,786 INFO sqlalchemy.engine.Engine SELECT movie.movieid, genre.genreid 
FROM movie JOIN movie_genre ON movie.movieid = movie_genre.movieid JOIN genre ON genre.genreid = movie_genre.genreid 
WHERE movie.movieid = ?
2023-12-19 00:16:19,786 INFO sqlalchemy.engine.Engine [generated in 0.00025s] (2,)
2023-12-19 00:16:19,787 INFO sqlalchemy.engine.Engine ROLLBACK


In [34]:
import pandas as pd
import numpy

In [35]:
df_mvid_1 = pd.DataFrame(results)

In [36]:
df_mvid_1

Unnamed: 0,movieid,genreid
0,2,3
1,2,5
2,2,10


In [37]:

df_mvid_1.genreid -= 1

In [38]:
df_mvid_1

Unnamed: 0,movieid,genreid
0,2,2
1,2,4
2,2,9


In [39]:
import numpy as np

genre_one_hot = np.array([0]*nb_genre)

In [40]:
genre_one_hot[df_mvid_1.genreid]=1

In [41]:
genre_one_hot

array([0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [42]:
df_films = pd.DataFrame(np.hstack([np.array([id_movie]).T,genre_one_hot])).T
df_films.columns = col_name

In [43]:
df_films

Unnamed: 0,movieid,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,2,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
query = select(Rating.movieid,Rating.rating).where(Rating.userid == 12)
with Session(engine) as session:
  results = session.exec(query).all()


2023-12-19 00:16:19,824 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,825 INFO sqlalchemy.engine.Engine SELECT rating.movieid, rating.rating 
FROM rating 
WHERE rating.userid = ?
2023-12-19 00:16:19,825 INFO sqlalchemy.engine.Engine [generated in 0.00021s] (12,)
2023-12-19 00:16:19,826 INFO sqlalchemy.engine.Engine ROLLBACK


In [45]:
pd.DataFrame(results)

Unnamed: 0,movieid,rating
0,1,4.0
1,3,3.0
2,5,2.0
3,6,3.0
4,7,3.0
5,17,3.0
6,32,3.0
7,34,4.0
8,36,4.0
9,95,3.0


In [46]:
def collect_movie_data(movieid_):

  statement = select(Movie.movieid, Genre.genreid)\
    .select_from(Movie)\
    .join(Movie_Genre, Movie.movieid == Movie_Genre.movieid)\
    .join(Genre, Genre.genreid == Movie_Genre.genreid)\
    .where(Movie.movieid == movieid_)

  with Session(engine) as session:
    results = session.exec(statement).all()

  df_mv = pd.DataFrame(results)
  df_mv.genreid -=1

  genre_one_hot = np.array([0]*nb_genre)
  genre_one_hot[df_mv.genreid]=1

  df_films = pd.DataFrame(np.hstack([np.array([movieid_]).T,genre_one_hot])).T
  df_films.columns = col_name
  return df_films


In [47]:
def collect_movie_data_userid(userid ):

  query = select(Rating.movieid,Rating.rating).where(Rating.userid == 12)

  with Session(engine) as session:
    list_movieid_rate = session.exec(query).all()

  list_df = list()
  for movieid,rating in list_movieid_rate:
    df_mv = collect_movie_data(movieid)
    df_mv["userid"] = userid
    df_mv["rating"] = rating

    list_df.append(df_mv)

  return pd.concat(list_df)

In [48]:
collect_movie_data_userid(userid = 4)

2023-12-19 00:16:19,842 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,843 INFO sqlalchemy.engine.Engine SELECT rating.movieid, rating.rating 
FROM rating 
WHERE rating.userid = ?
2023-12-19 00:16:19,843 INFO sqlalchemy.engine.Engine [cached since 0.01811s ago] (12,)
2023-12-19 00:16:19,844 INFO sqlalchemy.engine.Engine ROLLBACK
2023-12-19 00:16:19,845 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,845 INFO sqlalchemy.engine.Engine SELECT movie.movieid, genre.genreid 
FROM movie JOIN movie_genre ON movie.movieid = movie_genre.movieid JOIN genre ON genre.genreid = movie_genre.genreid 
WHERE movie.movieid = ?
2023-12-19 00:16:19,845 INFO sqlalchemy.engine.Engine [cached since 0.05943s ago] (1,)
2023-12-19 00:16:19,846 INFO sqlalchemy.engine.Engine ROLLBACK
2023-12-19 00:16:19,847 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-19 00:16:19,847 INFO sqlalchemy.engine.Engine SELECT movie.movieid, genre.genreid 
FROM movie JOIN movie_genre ON mo

Unnamed: 0,movieid,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,userid,rating
0,1,0,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,4,4.0
0,3,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,4,3.0
0,5,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,4,2.0
0,6,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,4,3.0
0,7,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,4,3.0
0,17,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,4,3.0
0,32,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,1,0,0,4,3.0
0,34,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,4,4.0
0,36,0,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,4,4.0
0,95,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,4,3.0
