# Cinescope Postgres and Redis Tests

### Install Dependencies for notebook testing

In [None]:
pip install psycopg2-binary pandas matplotlib redis

### Import our packages for the project

In [None]:
import decimal 
import json
import redis
import re
import psycopg2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from config import config

## Test the connection to the Postgres Database

In [None]:
# Establish a connection
def connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters-=--
        params = config()
 
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
      
        # create a cursor
        cur = conn.cursor()
        
   # execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')
 
        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)
       
       # close the communication with postgres
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
 
 
if __name__ == '__main__':
    connect()

## Run a basic query to return our SQL tables

In [None]:
def getTables():
    """Return a list of table names in the database."""
    conn = None
    tables = []
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute("""SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'""")
        
        tables = [table[0] for table in cur.fetchall()]  # Convert tuples to a list of strings
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()
    
    return tables  # Return the list

# Now call the function and iterate over the result
tables = getTables()

for t in tables:
    print(t)


## Print our postgres columns from function inputs

In [None]:
def getColumns(schema, table):
    """Return the column names of the given table."""
    conn = None
    column_names = []
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        sql_command = f"""
        SELECT column_name FROM information_schema.columns 
        WHERE table_schema = '{schema}' AND table_name = '{table}'
        """

        cur.execute(sql_command)
        column_names = [row[0] for row in cur.fetchall()]

        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()
    
    return column_names

for t in tables:
    table = t
    print("Table: %s" % table)
    columns = getColumns("public", table)
    print(f"{table}: {columns}")


## Test the Redis connection

In [None]:
def test_redis():
    try:
        # Set a test key-value pair in Redis
        redis_client.set("test_key", "Hello, Redis!")

        # Retrieve the value from Redis
        redis_value = redis_client.get("test_key")

        if redis_value:
            return f"Redis test successful! Value: {redis_value}", 200
        else:
            return "Failed to retrieve value from Redis.", 500
    except Exception as e:
        return f"Error connecting to Redis: {str(e)}", 500
test_redis()

## Test returning our genres with redis enabled

In [None]:
def getGenres(schema, table):
    """Return the genre names from the given schema and table, using caching."""
    genre_names = []
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        sql_command = f"SELECT name FROM {schema}.{table};"
        cache_key = f"genres_query:{sql_command}"
        
        # Check if the query result is in Redis cache
        cached_result = redis_client.get(cache_key)
        if cached_result:
            print("Cached result: ")
            return json.loads(cached_result)

        # If not cached, execute the SQL query
        cur.execute(sql_command)
        genre_names = [row[0] for row in cur.fetchall()]

        # Cache the result in Redis
        redis_client.setex(cache_key, 600000, json.dumps(genre_names))

        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()

    return genre_names

# Example usage:
genres = getGenres("public", "genre")
print(genres)

## Test returning our Genre Ids with Redis enabled

In [None]:
def getGenreId(schema, table, genre_name):
    """genre ID corresponding to the selected genre name."""
    conn = None
    genre_id = None
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        sql_command = f"SELECT id FROM {schema}.{table} WHERE name = %s;"
        cache_key = f"genre_id_query:{sql_command}"

         # Check if the query result is in Redis cache
        cached_result = redis_client.get(cache_key)
        if cached_result:
            print("cached result: ")
            return json.loads(cached_result)
        
        cur.execute(sql_command, (genre_name,))
        result = cur.fetchone() 
 
        if result:
            genre_id = result[0]
            
        redis_client.setex(cache_key, 600000, json.dumps(genre_id))
        
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()
    
    return genre_id
genre_id = getGenreId("public", "genre", "Action")
print(genre_id)

## Test Retrieving our languages with redis enabled

In [None]:
def getLanguages():
    conn = None
    languages = []
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        sql_command = f"SELECT DISTINCT original_language FROM movies WHERE original_language IS NOT NULL ORDER BY original_language;"

        cache_key = f"lang_query:{sql_command}"
        
        cached_result = redis_client.get(cache_key)
        if cached_result:
            print("cached result: ")
            return json.loads(cached_result)
            
        cur.execute(sql_command)
        rows = cur.fetchall()

        languages = [(row[0], LANGUAGE_MAP.get(row[0], row[0])) for row in rows]
        redis_client.setex(cache_key, 600000, json.dumps(languages))
        
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()
    
    return languages
lang = getLanguages()
print(lang)

## Test the Top 5 Movie Query with Redis enabled

In [None]:
def decimal_default(obj):
    """Convert Decimal to float or string for JSON serialization."""
    if isinstance(obj, decimal.Decimal):
        return float(obj)
    raise TypeError("Type not serializable")
    
def getMoviesByGenreAndLanguage(genre_id, language):
    """Run the Movie Query."""
    conn = None
    movies = []
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        with open("../sql/genre-lookup-Copy1.sql", "r") as file:
            sql_query = file.read()

        sql_query = sql_query.replace("{GENRE_ID}", str(genre_id))
        sql_query = sql_query.replace("{LANGUAGE_PARAM}", language)

        cache_key = f"{genre_id}_{language}_test_query:{sql_query}"
        cached_result = redis_client.get(cache_key)
        if cached_result:
            print("cached result: ")
            return json.loads(cached_result)
            
        cur.execute(sql_query)
        movies = cur.fetchall()

        redis_client.setex(cache_key, 600000, json.dumps(movies, default=decimal_default))
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()

    return movies

movies = getMoviesByGenreAndLanguage(28, 'en')
movie_names = [movie[0] for movie in movies]

print(movie_names)

# Gather the normalized votes per movie

In [None]:
df = pd.read_csv('../data/csv-files/movies-master.csv')
print(len(df))
plt.figure(figsize=(12, 6))

with np.errstate(divide='ignore', invalid='ignore'):
        logDF = np.log(df['vote_count'])

plt.scatter(range(len(df)), logDF, color='blue', alpha=0.5)

plt.title('Distribution of Vote Counts by Movie - Normalized')
plt.xlabel('Movies')
plt.ylabel('Votes Cast')

plt.ylim(0, 25)

plt.grid(True)

plt.show()


In [None]:
mean_vc = df['vote_count'].mean()

In [None]:
print(np.log(mean_vc * 10))

In [None]:
print(np.log(mean_vc * 2))

## Query the movie columns when a specific film title is matched

In [None]:
def getMovieByTitle(schema, table, title):
    """Return all columns for the movie table where the title matches the given value."""
    conn = None
    movie_info = []
    try:
        # Get database connection parameters
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        # SQL query to fetch all information for the movie with the matching title
        sql_command = f"""
        SELECT * FROM {schema}.{table} 
        WHERE title = %s
        """

        # Execute the query
        cur.execute(sql_command, (title,))
        movie_info = cur.fetchall()  # Fetch all results

        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Database error: {error}")
    finally:
        if conn is not None:
            conn.close()

    return movie_info

# Define the schema and table
schema = "public"
table = "movies"
title = "The Dark Knight"

# Get movie information based on title
movie_details = getMovieByTitle(schema, table, title)

# Print the details
if movie_details:
    for row in movie_details:
        print(f"Movie Details: {row}")
else:
    print("No movie found with that title.")
