# Museum Analysis

In [1]:
import os
import pandas as pd
from psycopg2 import connect, Error
from psycopg2.extras import RealDictCursor
from sqlalchemy import create_engine, text
import matplotlib.pyplot as plt

In [2]:
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_NAME = os.getenv('DB_NAME')

In [6]:
conn = connect(
        dbname=DB_NAME,
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        cursor_factory=RealDictCursor
    )

In [33]:
def run_sql_query(query: str) -> None:
    try:
        connection = connect(
        dbname=DB_NAME,
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        cursor_factory=RealDictCursor
        )
        cursor = connection.cursor()
        connection.autocommit = False

        cursor.execute(query)
        connection.commit()

        rows = cursor.fetchall()
        result = [dict(row) for row in rows]
        print(result)
        print(rows)

    except Error as e:
        print(e)
        connection.rollback()
        
    finally:
        cursor.close()
        connection.close()
        return result

## What exhibition is most frequently visited?

In [34]:
most_visited_exhibition = """
    SELECT exhibit_id, SUM(event_count) AS total_count
    FROM (
    SELECT exhibit_id, COUNT(id) AS event_count
    FROM rating_events
    GROUP BY exhibit_id
    
    UNION ALL
    
    SELECT exhibit_id, COUNT(support_value_id) AS event_count
    FROM support_events
    GROUP BY exhibit_id
    ) AS combined_events
    GROUP BY exhibit_id
    ORDER BY total_count DESC
    LIMIT 1;
    """

query_result = run_sql_query(most_visited_exhibition)
f'The most frequently visited exhibition is exhibit {query_result[0]["exhibit_id"]}'

[{'exhibit_id': 4, 'total_count': Decimal('16032')}]
[RealDictRow([('exhibit_id', 4), ('total_count', Decimal('16032'))])]


'The most frequently visited exhibition is exhibit 4'

## What hour of the day has the most ratings?

In [None]:
most_popular_hour = """"""

## What is the average rating for each exhibition?

In [43]:
average_exhibition_ratings = """
    SELECT exhibit_id, ROUND(AVG(rating_value_id), 2) as average_rating
    FROM rating_events 
    GROUP BY exhibit_id
    ORDER BY average_rating DESC
"""

query_result = run_sql_query(average_exhibition_ratings)
query_result[0]['average_rating']
float(query_result[0]['average_rating'])
f""

[{'exhibit_id': 4, 'average_rating': Decimal('3.83')}, {'exhibit_id': 2, 'average_rating': Decimal('2.93')}, {'exhibit_id': 1, 'average_rating': Decimal('2.92')}, {'exhibit_id': 3, 'average_rating': Decimal('2.43')}, {'exhibit_id': 5, 'average_rating': Decimal('2.22')}]
[RealDictRow([('exhibit_id', 4), ('average_rating', Decimal('3.83'))]), RealDictRow([('exhibit_id', 2), ('average_rating', Decimal('2.93'))]), RealDictRow([('exhibit_id', 1), ('average_rating', Decimal('2.92'))]), RealDictRow([('exhibit_id', 3), ('average_rating', Decimal('2.43'))]), RealDictRow([('exhibit_id', 5), ('average_rating', Decimal('2.22'))])]


''

## What proportion of all 4+ ratings are given to exhibition 4?

In [44]:
exhibit_four_ratings_proportion = """
    SELECT exhibit_id, 
    ROUND((COUNT(*) * 100.0 / SUM(COUNT(*)) OVER ()), 2) AS proportion_percentage
    FROM rating_events
    WHERE rating_value_id >= 4
    GROUP BY exhibit_id
"""
run_sql_query(exhibit_four_ratings_proportion)

[{'exhibit_id': 1, 'proportion_percentage': Decimal('15.07')}, {'exhibit_id': 3, 'proportion_percentage': Decimal('12.79')}, {'exhibit_id': 5, 'proportion_percentage': Decimal('9.13')}, {'exhibit_id': 4, 'proportion_percentage': Decimal('44.75')}, {'exhibit_id': 2, 'proportion_percentage': Decimal('18.26')}]
[RealDictRow([('exhibit_id', 1), ('proportion_percentage', Decimal('15.07'))]), RealDictRow([('exhibit_id', 3), ('proportion_percentage', Decimal('12.79'))]), RealDictRow([('exhibit_id', 5), ('proportion_percentage', Decimal('9.13'))]), RealDictRow([('exhibit_id', 4), ('proportion_percentage', Decimal('44.75'))]), RealDictRow([('exhibit_id', 2), ('proportion_percentage', Decimal('18.26'))])]


[{'exhibit_id': 1, 'proportion_percentage': Decimal('15.07')},
 {'exhibit_id': 3, 'proportion_percentage': Decimal('12.79')},
 {'exhibit_id': 5, 'proportion_percentage': Decimal('9.13')},
 {'exhibit_id': 4, 'proportion_percentage': Decimal('44.75')},
 {'exhibit_id': 2, 'proportion_percentage': Decimal('18.26')}]

## Are positive ratings more frequent before or after 1pm?

## How many ratings each hour are above the average rating for exhibition 4?

## Do Zoology exhibitions get better ratings than other types?

## What exhibition has the most emergencies?

## Which exhibitions receive fewer assistance requests than the average?

## Are there particular times when assistance requests/emergencies are more likely?

## Are emergencies more likely in exhibitions that are more exciting than others?

## Which floors are above average for ratings?