In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine

In [2]:
load_dotenv()

username, Password = os.getenv("USER"), os.getenv("PASSWORD")
conn_string = f'postgresql://{username}:{Password}@localhost/painting'
db = create_engine(conn_string)
conn = db.connect()

In [3]:
work_df = pd.read_sql_table('work', conn_string)
product_size_df = pd.read_sql_table('product_size', conn_string)
subject_df = pd.read_sql_table('subject', conn_string)
artist_df = pd.read_sql_table('artist', conn_string)
canvas_size_df = pd.read_sql_table('canvas_size', conn_string)
image_link_df = pd.read_sql_table('image_link', conn_string)
museum_hours_df = pd.read_sql_table('museum_hours', conn_string)
museum_df = pd.read_sql_table('museum', conn_string)

### 1.  Fetch all the paintings which are not displayed on any museums?

In [4]:
not_displayed_paintings =work_df[work_df['museum_id'].isnull()]
print(len(not_displayed_paintings))
not_displayed_paintings

10223


Unnamed: 0,work_id,name,artist_id,style,museum_id
6,125752,Arabian Horses at Pasture,757,Baroque,
7,125818,Count Halm on His Basedow Estate,757,Baroque,
9,125763,Napoleon Before the Burning City of Smolensk,757,Baroque,
10,125774,Peasants Resting in the Field,757,Baroque,
11,125785,Portrait Oberleutnant Theodor Von Klein,757,Baroque,
...,...,...,...,...,...
14768,8273,Waiting for an Answer,794,American Art,
14772,8283,"West Point, Prout's Neck",794,American Art,
14773,8285,Winding Line,794,American Art,
14774,184820,Untitled,620,Surrealism,


### 2.  Are there museums without any paintings?

In [5]:
museum_without_paintings = museum_df.merge(work_df, on='museum_id', how='left', indicator=True)
museum_without_paintings = museum_without_paintings[museum_without_paintings['_merge'] == 'left_only']
print(len(museum_without_paintings))
museum_without_paintings

0


Unnamed: 0,museum_id,name_x,address,city,state,postal,country,phone,url,work_id,name_y,artist_id,style,_merge


#### 3. How many paintings have an asking price of more than their regular price?

In [None]:
filtered_df = product_size_df[
    product_size_df['sale_price'] > product_size_df['regular_price']
]
filtered_df


#### 4. Identify the paintings whose asking price is less than 50% of its regular price.

In [15]:
filtered_product_size_df = product_size_df[
    product_size_df['sale_price'] < product_size_df['regular_price'] * 0.5]

print(filtered_product_size_df.count())
filtered_product_size_df

work_id          58
size_id          58
sale_price       58
regular_price    58
dtype: int64


Unnamed: 0,work_id,size_id,sale_price,regular_price
220,31780,36,10,125
221,31780,30,10,95
686,31780,36,10,125
687,31780,30,10,95
14073,198417,36,30,125
14074,198417,30,30,95
17331,31974,24,30,85
29406,17351,24,10,85
29407,17351,30,10,95
29408,17351,36,10,125


#### 5. Which canvas size costs the most?

In [16]:
product_size_df['rnk'] = product_size_df['sale_price'].rank(method='dense', ascending=False)

top_ranked_df = product_size_df[product_size_df['rnk'] == 1]

result_df = top_ranked_df.merge(
    canvas_size_df,
    left_on=top_ranked_df['size_id'].astype(str),
    right_on=canvas_size_df['size_id'].astype(str),
    how='inner'
)

result_df = result_df[['label', 'sale_price']].rename(columns={'label': 'canva'})

result_df


Unnamed: 0,canva,sale_price
0,"48"" x 96""(122 cm x 244 cm)",1115


#### 6. Delete duplicate records from the work, product_size, subject, and image_link tables.


#### 7. Identify the museums with invalid city information in the given dataset.

#### 8. Museum_Hours table has 1 invalid entry. Identify it and remove it.


#### 9. Fetch the top 10 most famous painting subjects.

#### 10. Identify the museums which are open on both Sunday and Monday. Display museum name and city.

#### 11. How many museums are open every single day?

#### 12. Which are the top 5 most popular museums? (Popularity is defined based on the most number of paintings in a museum.)

#### 13. Who are the top 5 most popular artists? (Popularity is defined based on the most number of paintings done by an artist.)

#### 14. Display the 3 least popular canvas sizes.

#### 15. Which museum is open for the longest during a day? Display museum name, state, hours open, and which day.

#### 16. Which museum has the most number of paintings in the most popular painting style?

#### 17. Identify the artists whose paintings are displayed in multiple countries.

#### 18. Display the country and the city with the most number of museums. Output two separate columns to mention the city and country. If there are multiple values, separate them with commas.

#### 19. Identify the artist and the museum where the most expensive and least expensive painting is placed. Display the artist's name, sale price, painting name, museum name, museum city, and canvas label.

#### 20. Which country has the 5th highest number of paintings?

#### 21. Which are the 3 most popular and 3 least popular painting styles?

#### 22. Which artist has the most number of portrait paintings outside the USA? Display the artist name, number of paintings, and the artist's nationality.