## The task was solved using two methods:

1. by performing SQL queries,
2. by implementing ORM (SQLAlchemy).

### First method (SQL queries)

**Testing DB Connection**

In [10]:
import psycopg2

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="movielens_db",
    user="postgres",
    password="postgres",
    host="db",
    port="5432"
)

**SQL queries**

**1. How many movies are in the dataset?**

In [11]:
# Create a cursor object
cur = conn.cursor()

cur.execute("SELECT COUNT(*) FROM movies;")
movie_count = cur.fetchone()[0]
print(f"Total number of movies: {movie_count}")

Total number of movies: 9742


 **2. What is the most common genre of movie?**

In [12]:
cur.execute("""
    SELECT genres, COUNT(*)
    FROM movies
    GROUP BY genres
    ORDER BY COUNT(*) DESC
    LIMIT 1;
""")
most_common_genre = cur.fetchone()[0]
print(f"Most common genre: {most_common_genre}")

Most common genre: Drama


**3. What are top 10 movies with the highest rating?**

In [13]:
cur.execute("""
    SELECT m.title, AVG(r.rating) as avg_rating
    FROM ratings r
    JOIN movies m ON r.movieId = m.movieId
    GROUP BY m.title
    ORDER BY avg_rating DESC
    LIMIT 10;
""")
top_movies = cur.fetchall()
print("Top 10 movies with the highest average rating:")
for movie in top_movies:
    print(movie)

Top 10 movies with the highest average rating:
('Brother (Brat) (1997)', 5.0)
('Eva (2011)', 5.0)
('Go for Zucker! (Alles auf Zucker!) (2004)', 5.0)
('Holy Motors (2012)', 5.0)
('Connections (1978)', 5.0)
('Watching the Detectives (2007)', 5.0)
('Girls About Town (1931)', 5.0)
('Story of Women (Affaire de femmes, Une) (1988)', 5.0)
('Nasu: Summer in Andalusia (2003)', 5.0)
('Into the Woods (1991)', 5.0)


**4. Who are the 5 most often rating users?**

In [14]:
cur.execute("""
    SELECT userId, COUNT(*) as rating_count
    FROM ratings
    GROUP BY userId
    ORDER BY rating_count DESC
    LIMIT 5;
""")
most_active_users = cur.fetchall()
print("Top 5 most active users:")
for user in most_active_users:
    print(user)

Top 5 most active users:
(414, 2698)
(599, 2478)
(474, 2108)
(448, 1864)
(274, 1346)


**5. When was done first and last rate included in dataset, and what was the rated movie title?**

In [16]:
cur.execute("""
    SELECT m.title, to_timestamp(r.timestamp) as rating_time
    FROM ratings r
    JOIN movies m ON r.movieId = m.movieId
    ORDER BY r.timestamp ASC
    LIMIT 1;
""")
first_rating = cur.fetchone()
print(f"First rating: {first_rating[0]} at {first_rating[1]}")

cur.execute("""
    SELECT m.title, to_timestamp(r.timestamp) as rating_time
    FROM ratings r
    JOIN movies m ON r.movieId = m.movieId
    ORDER BY r.timestamp DESC
    LIMIT 1;
""")
last_rating = cur.fetchone()
print(f"Last rating: {last_rating[0]} at {last_rating[1]}")

First rating: Copycat (1995) at 1996-03-29 18:36:55+00:00
Last rating: Crumb (1994) at 2018-09-24 14:27:30+00:00


**6. Find all movies released in 1990.**

In [17]:
cur.execute("""
    SELECT title 
    FROM movies
    WHERE title LIKE '%(1990)%';
""")
movies_1990 = cur.fetchall()
print("Movies released in 1990:")
for movie in movies_1990:
    print(movie[0])

Movies released in 1990:
Home Alone (1990)
Ghost (1990)
Dances with Wolves (1990)
Pretty Woman (1990)
Days of Thunder (1990)
Grifters, The (1990)
Tie Me Up! Tie Me Down! (¡Átame!) (1990)
Paris Is Burning (1990)
Goodfellas (1990)
Trust (1990)
Rosencrantz and Guildenstern Are Dead (1990)
Miller's Crossing (1990)
Femme Nikita, La (Nikita) (1990)
Pump Up the Volume (1990)
Cyrano de Bergerac (1990)
Amityville Curse, The (1990)
Die Hard 2 (1990)
Young Guns II (1990)
Marked for Death (1990)
Hunt for Red October, The (1990)
King of New York (1990)
Metropolitan (1990)
Child's Play 2 (1990)
Exorcist III, The (1990)
Gremlins 2: The New Batch (1990)
Back to the Future Part III (1990)
Godfather: Part III, The (1990)
Rescuers Down Under, The (1990)
NeverEnding Story II: The Next Chapter, The (1990)
My Blue Heaven (1990)
Sheltering Sky, The (1990)
Edward Scissorhands (1990)
Tales from the Darkside: The Movie (1990)
Heart Condition (1990)
Rocky V (1990)
Dick Tracy (1990)
Arachnophobia (1990)
Problem C

In [20]:
cur.close()
conn.close()

### Second method (ORM implementation)

In [35]:
from sqlalchemy import create_engine, func, desc
from sqlalchemy.orm import declarative_base, sessionmaker, Session
from sqlalchemy import BigInteger, Column, ForeignKey, Float, Integer, String

In [43]:
# DB connection
DB_URL = "postgresql://postgres:postgres@db:5432/movielens_db"

Engine = create_engine(url = DB_URL)

DB_session = sessionmaker(
    autoflush = False,
    autocommit = False,
    bind = Engine
)

Base = declarative_base()
Base.metadata.create_all(bind = Engine)

In [44]:
# DB object mapping
class Movie(Base):
    __tablename__ = 'movies'

    movieid = Column(Integer, primary_key = True, autoincrement = True)
    title = Column(String)
    genres = Column(String)

class Rating(Base):
    __tablename__ = 'ratings'

    ratingid = Column(Integer, primary_key = True, autoincrement = True)
    userid = Column(Integer)
    movieid = Column(Integer, ForeignKey('movies.movieid'))
    rating = Column(Float)
    timestamp = Column(BigInteger)

In [56]:
def get_movies_count(db : Session):
    return db.query(func.count(Movie.movieid)).scalar()

def get_most_common_genre(db : Session):
    return db\
        .query(Movie.genres, func.count(Movie.genres))\
        .group_by(Movie.genres)\
        .order_by(func.count(Movie.genres)\
        .desc())\
        .first()

def get_top_10_movies(db : Session):
    return db\
        .query(Movie.title, func.avg(Rating.rating).label('avg_rating'))\
        .join(Rating, Movie.movieid == Rating.movieid)\
        .group_by(Movie.title)\
        .order_by(desc('avg_rating'))\
        .limit(10)\
        .all()

def get_5_most_often_rating_users(db : Session):
    return db\
        .query(Rating.userid, func.count(Rating.userid).label('rating_count'))\
        .group_by(Rating.userid)\
        .order_by(desc('rating_count'))\
        .limit(5)\
        .all()

def get_first_rating(db : Session):
    return db\
        .query(Movie.title, func.to_timestamp(Rating.timestamp))\
        .join(Rating, Movie.movieid == Rating.movieid)\
        .order_by(Rating.timestamp.asc())\
        .first()

def get_last_rating(db : Session):
    return db\
        .query(Movie.title, func.to_timestamp(Rating.timestamp))\
        .join(Rating, Movie.movieid == Rating.movieid)\
        .order_by(Rating.timestamp.desc())\
        .first()

def get_movies_from_1990(db : Session):
    return db\
        .query(Movie.title)\
        .filter(Movie.title.like('%(1990)%'))\
        .all()


**1. How many movies are in the dataset?**

In [104]:
movie_count = get_movies_count(db = DB_session())
print(f"Total number of movies: [{movie_count}]")

Total number of movies: [9742]


**2. What is the most common genre of movie?**

In [105]:
most_common_genre = get_most_common_genre(db = DB_session())
print(f"Most common genre: \"{most_common_genre[0]}\", Total count: [{most_common_genre[1]}]")

Most common genre: "Drama", Total count: [1053]


**3. What are top 10 movies with the highest rating?**

In [106]:
top_10_movies = get_top_10_movies(db = DB_session())
print("=" * 30, "Top 10 movies", "=" * 30)
for rank, movie in enumerate(top_10_movies, start = 1):
    print(f"[{rank:<2}]. {movie[0]:<50} AVG_Rating: [{movie[1]:.2f}]" )

[1 ]. Brother (Brat) (1997)                              AVG_Rating: [5.00]
[2 ]. Eva (2011)                                         AVG_Rating: [5.00]
[3 ]. Go for Zucker! (Alles auf Zucker!) (2004)          AVG_Rating: [5.00]
[4 ]. Holy Motors (2012)                                 AVG_Rating: [5.00]
[5 ]. Connections (1978)                                 AVG_Rating: [5.00]
[6 ]. Watching the Detectives (2007)                     AVG_Rating: [5.00]
[7 ]. Girls About Town (1931)                            AVG_Rating: [5.00]
[8 ]. Story of Women (Affaire de femmes, Une) (1988)     AVG_Rating: [5.00]
[9 ]. Nasu: Summer in Andalusia (2003)                   AVG_Rating: [5.00]
[10]. Into the Woods (1991)                              AVG_Rating: [5.00]


**4. Who are the 5 most often rating users?**

In [107]:
most_active_users = get_5_most_often_rating_users(db = DB_session())
print("=" * 9, " Top 5 most active users ", "=" * 9)
for rank, user in enumerate(most_active_users, start = 1):
    print(f"[{rank}]. USER_ID: [{user[0]:>5}] \tRating count: [{user[1]:>5}]")

[1]. USER_ID: [  414] 	Rating count: [ 2698]
[2]. USER_ID: [  599] 	Rating count: [ 2478]
[3]. USER_ID: [  474] 	Rating count: [ 2108]
[4]. USER_ID: [  448] 	Rating count: [ 1864]
[5]. USER_ID: [  274] 	Rating count: [ 1346]


**5. When was done first and last rate included in dataset, and what was the rated movie title?**

In [108]:
first_rating = get_first_rating(db = DB_session())
print(f"First rating: {first_rating[0]:^20} at {first_rating[1]}")

last_rating = get_last_rating(db = DB_session())
print(f"Last  rating: {last_rating[0]:^20} at {last_rating[1]}")

First rating:    Copycat (1995)    at 1996-03-29 18:36:55+00:00
Last  rating:     Crumb (1994)     at 2018-09-24 14:27:30+00:00


**6. Find all movies released in 1990.**

In [109]:
movies_1990 = get_movies_from_1990(db = DB_session())
print("=" * 15, "Movies released in 1990", "=" * 15)
for count, movie in enumerate(movies_1990):
    print(f"[{count:<3}]. {movie[0]}")

[0  ]. Home Alone (1990)
[1  ]. Ghost (1990)
[2  ]. Dances with Wolves (1990)
[3  ]. Pretty Woman (1990)
[4  ]. Days of Thunder (1990)
[5  ]. Grifters, The (1990)
[6  ]. Tie Me Up! Tie Me Down! (¡Átame!) (1990)
[7  ]. Paris Is Burning (1990)
[8  ]. Goodfellas (1990)
[9  ]. Trust (1990)
[10 ]. Rosencrantz and Guildenstern Are Dead (1990)
[11 ]. Miller's Crossing (1990)
[12 ]. Femme Nikita, La (Nikita) (1990)
[13 ]. Pump Up the Volume (1990)
[14 ]. Cyrano de Bergerac (1990)
[15 ]. Amityville Curse, The (1990)
[16 ]. Die Hard 2 (1990)
[17 ]. Young Guns II (1990)
[18 ]. Marked for Death (1990)
[19 ]. Hunt for Red October, The (1990)
[20 ]. King of New York (1990)
[21 ]. Metropolitan (1990)
[22 ]. Child's Play 2 (1990)
[23 ]. Exorcist III, The (1990)
[24 ]. Gremlins 2: The New Batch (1990)
[25 ]. Back to the Future Part III (1990)
[26 ]. Godfather: Part III, The (1990)
[27 ]. Rescuers Down Under, The (1990)
[28 ]. NeverEnding Story II: The Next Chapter, The (1990)
[29 ]. My Blue Heaven (199

In [110]:
DB_session().close()