In [1]:
!pip install sqlmodel==0.0.22 -q
!pip install beautifulsoup4==4.12.0 -q

In [1]:
import requests
import sqlmodel
from bs4 import BeautifulSoup as bs
from typing import List

In [2]:
class ArtistArtMovement(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artist_id: int = sqlmodel.Field(foreign_key="artist.id")
    art_movement: str
    artist: "Artist" = sqlmodel.Relationship(back_populates="art_movements")


class ArtistGenre(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artist_id: int = sqlmodel.Field(foreign_key="artist.id")
    genre: str
    artist: "Artist" = sqlmodel.Relationship(back_populates="genres")


class ArtistField(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artist_id: int = sqlmodel.Field(foreign_key="artist.id")
    field: str
    artist: "Artist" = sqlmodel.Relationship(back_populates="fields")


class ArtistNationality(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artist_id: int = sqlmodel.Field(foreign_key="artist.id")
    nationality: str
    artist: "Artist" = sqlmodel.Relationship(back_populates="nationalities")


class ArtistInstitution(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artist_id: int = sqlmodel.Field(foreign_key="artist.id")
    institution: str
    artist: "Artist" = sqlmodel.Relationship(back_populates="institutions")


class ArtistBase(sqlmodel.SQLModel):
    id: int | None = sqlmodel.Field(primary_key=True)
    name: str
    url: str
    wikipedia_article: str | None
    wikipedia_url: str | None
    article: str | None


class Artist(ArtistBase, table=True):
    artworks: list["Artwork"] = sqlmodel.Relationship(back_populates='artist')
    art_movements: list["ArtistArtMovement"] = sqlmodel.Relationship(back_populates='artist')
    genres: list["ArtistGenre"] = sqlmodel.Relationship(back_populates='artist')
    fields: list["ArtistField"] = sqlmodel.Relationship(back_populates='artist')
    nationalities: list["ArtistNationality"] = sqlmodel.Relationship(back_populates='artist')
    institutions: list["ArtistInstitution"] = sqlmodel.Relationship(back_populates='artist')

In [3]:
class ArtworkStyle(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artwork_id: int = sqlmodel.Field(foreign_key="artwork.id")
    style: str
    artwork: "Artwork" = sqlmodel.Relationship(back_populates="styles")


class ArtworkGenre(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artwork_id: int = sqlmodel.Field(foreign_key="artwork.id")
    genre: str
    artwork: "Artwork" = sqlmodel.Relationship(back_populates="genres")


class ArtworkMedia(sqlmodel.SQLModel, table=True):
    id: int = sqlmodel.Field(primary_key=True)
    artwork_id: int = sqlmodel.Field(foreign_key="artwork.id")
    media: str
    artwork: "Artwork" = sqlmodel.Relationship(back_populates="media")


class ArtworkBase(sqlmodel.SQLModel, table=True):
    id: int | None = sqlmodel.Field(primary_key=True)
    url: str
    name: str
    artist_id: int | None = sqlmodel.Field(foreign_key="artist.id")


class Artwork(ArtworkBase, table=True):
    artist: Artist = sqlmodel.Relationship(back_populates="artworks")
    styles: list[ArtworkStyle] = sqlmodel.Relationship(back_populates="artwork")
    genres: list[ArtworkGenre] = sqlmodel.Relationship(back_populates="artwork")
    media: list[ArtworkMedia] = sqlmodel.Relationship(back_populates="artwork")

In [4]:
artwork = Artwork(url="https://www.wikiart.org/en/claude-monet/water-lilies-1916", name="Water Lilies")

AttributeError: 'NoneType' object has no attribute 'set'

In [None]:
def get_artists_by_movement(movement: str):
    pass


def get_artists_by_school_or_group(school_or_group: str):
    pass


def get_artists_by_genre(genre: str):
    pass


def get_artists_by_field(field: str):
    pass


def get_artist_by_nationality(nationality: str):
    pass


def get_institutions() -> list[str]:
    """
    Returns a list of (institution name, url)
    https://www.wikiart.org/en/artists-by-art-institution
    """
    base_url = "https://www.wikiart.org/en/artists-by-art-institution/"
    resp = requests.get(base_url)
    soup = bs(resp.text, 'html.parser')
    bullets = soup.find_all('ul', {"class": "dictionaries-list", "ng-if": "!tabChanged"})
    if len(bullets) != 1:
        raise ValueError("More than one list of institutions found")

    institutions = [
        (li.text.strip(), li.a["href"]) for li in bullets[0].find_all("li") if li.a and "href" in li.a.attrs
    ]
    return institutions


def get_artists_by_institution(institution: str, url: str) -> list[Artist]:
    """
    Returns a list of artists that have been associated with the institution
    Institution: Name of the institution
    url: suffix url /en/artists-by-art-institution/{institute}


    """
    url = f'https://www.wikiart.org{url}/text-list'
    resp = requests.get(url)

    soup = bs(resp.text, 'html.parser')
    main = soup.find('main')
    if not main:
        raise ValueError("Main content not found")

    ul = main.find('ul')
    if not ul:
        raise ValueError("List of artists not found")

    artists = [(li.a.content) for li in ul.find_all('li')]

In [20]:
institutions = get_institutions()

for institute, url in institutions[0:1]:
    print(f'{institute=}, {url=}')
    get_artist_by_institution(institute, url)

institute='South Australian School of Design, Adelaide, Australia 3', url='/en/artists-by-art-institution/south-australian-school-of-design-adelaide'
<Response [200]>
