In [1]:
!pip install sqlmodel==0.0.22 -q
!pip install beautifulsoup4==4.12.0 -q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import requests
import sqlmodel
from bs4 import BeautifulSoup as bs
from typing import List

In [None]:
class ArtistArtMovement(sqlmodel.SQLModel):
    """
    Relationship model between Artist and ArtMovement
    """

    artist_name: str = sqlmodel.Field(primary_key=True)
    art_movement: str = sqlmodel.Field(primary_key=True)


class ArtistGenre(sqlmodel.SQLModel):
    """
    Relationship model between Artist and Genre
    """

    artist_name: str = sqlmodel.Field(primary_key=True)
    genre: str = sqlmodel.Field(primary_key=True)


class ArtistField(sqlmodel.SQLModel):
    """
    Relationship model between Artist and Field
    """

    artist_name: str = sqlmodel.Field(primary_key=True)
    field: str = sqlmodel.Field(primary_key=True)


class ArtistNationality(sqlmodel.SQLModel):
    """
    Relationship model between Artist and Nationality
    """

    artist_name: str = sqlmodel.Field(primary_key=True)
    nationality: str = sqlmodel.Field(primary_key=True)


class ArtistInstitution(sqlmodel.SQLModel):
    """
    Relationship model between Artist and Institution
    """

    artist_name: str = sqlmodel.Field(primary_key=True)
    institution: str = sqlmodel.Field(primary_key=True)


class ArtistBase(sqlmodel.SQLModel):
    name: str = sqlmodel.Field()
    slug: str = sqlmodel.Field(primary_key=True, default=None)
    wikipedia_article: str | None
    article: str | None


class Artist(ArtistBase, table=True):
    artworks: list["Artwork"] = sqlmodel.Relationship(back_populates="artist")
    art_movements: list[ArtistArtMovement] = sqlmodel.Relationship(back_populates="artist")
    genres: list[ArtistGenre] = sqlmodel.Relationship(back_populates="artist")
    fields: list[ArtistField] = sqlmodel.Relationship(back_populates="artist")
    nationalities: list[ArtistNationality] = sqlmodel.Relationship(back_populates="artist")
    institutions: list[ArtistInstitution] = sqlmodel.Relationship(back_populates="artist")

In [None]:
class ArtworkStyle(sqlmodel.SQLModel):
    """
    Relationship model between Artwork and Style
    """

    artwork_name: str = sqlmodel.Field(primary_key=True)
    style: str = sqlmodel.Field(primary_key=True)


class ArtworkGenre(sqlmodel.SQLModel):
    """
    Relationship model between Artwork and Genre
    """

    artwork_name: str = sqlmodel.Field(primary_key=True)
    genre: str = sqlmodel.Field(primary_key=True)


class ArtworkMedia(sqlmodel.SQLModel):
    """
    Relationship model between Artwork and Media
    """

    artwork_name: str = sqlmodel.Field(primary_key=True)
    media: str = sqlmodel.Field(primary_key=True)


class ArtworkBase(sqlmodel.SQLModel):
    slug: str = sqlmodel.Field(primary_key=True, default=None)
    name: str
    artist_name: str = sqlmodel.Field(foreign_key="Artist.name")


class Artwork(ArtworkBase, table=True):
    artist: Artist = sqlmodel.Relationship(back_populates="artworks")
    styles: list[ArtworkStyle] = sqlmodel.Relationship(back_populates="artwork")
    genres: list[ArtworkGenre] = sqlmodel.Relationship(back_populates="artwork")
    media: list[ArtworkMedia] = sqlmodel.Relationship(back_populates="artwork")

In [None]:
def get_artists_by_movement(movement: str):
    pass


def get_artists_by_school_or_group(school_or_group: str):
    pass


def get_artists_by_genre(genre: str):
    pass


def get_artists_by_field(field: str):
    pass


def get_artist_by_nationality(nationality: str):
    pass


def get_institutions() -> list[str]:
    """
    Returns a list of (institution name, url)
    https://www.wikiart.org/en/artists-by-art-institution
    """
    base_url = "https://www.wikiart.org/en/artists-by-art-institution/"
    resp = requests.get(base_url)
    soup = bs(resp.text, 'html.parser')
    bullets = soup.find_all('ul', {"class": "dictionaries-list", "ng-if": "!tabChanged"})
    if len(bullets) != 1:
        raise ValueError("More than one list of institutions found")

    institutions = [
        (li.text.strip(), li.a["href"]) for li in bullets[0].find_all("li") if li.a and "href" in li.a.attrs
    ]
    return institutions


def get_artists_by_institution(institution: str, url: str) -> list[Artist]:
    """
    Returns a list of artists that have been associated with the institution
    Institution: Name of the institution
    url: suffix url /en/artists-by-art-institution/{institute}
    """
    url = f'https://www.wikiart.org{url}/text-list'
    resp = requests.get(url)

    soup = bs(resp.text, 'html.parser')
    main = soup.find('main')
    if not main:
        raise ValueError("Main content not found")

    ul = main.find('ul')
    if not ul:
        raise ValueError("List of artists not found")

    artists = [(li.a.content) for li in ul.find_all('li')]

In [20]:
institutions = get_institutions()

for institute, url in institutions[0:1]:
    print(f'{institute=}, {url=}')
    get_artist_by_institution(institute, url)

institute='South Australian School of Design, Adelaide, Australia 3', url='/en/artists-by-art-institution/south-australian-school-of-design-adelaide'
<Response [200]>
