In [1]:
!pip install SPARQLWrapper



In [7]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
# SPARQL endpoint
endpoint_url = "https://query.wikidata.org/sparql"
sparql = SPARQLWrapper(endpoint_url)

# SPARQL query for count
count_query = """
SELECT (COUNT(DISTINCT ?artist) AS ?totalArtists) WHERE {
  ?artist wdt:P31 wd:Q5;                
          wdt:P172 wd:Q49085;           
          wdt:P106 wd:Q1028181.         

  OPTIONAL { ?artist wdt:P569 ?birthDate. }
  OPTIONAL { ?artist wdt:P19 ?birthPlace. }
  OPTIONAL { ?artist wdt:P570 ?deathDate. }
  OPTIONAL { ?artist wdt:P20 ?deathPlace. }
  OPTIONAL { ?artist wdt:P27 ?nationality. }
  OPTIONAL { ?artist wdt:P69 ?education. }
  OPTIONAL { ?artist wdt:P2031 ?yearsActive. }
  OPTIONAL { ?artist wdt:P800 ?notableWorkLabel. }
  OPTIONAL { ?artist wdt:P2561 ?knownForLabel. }
  OPTIONAL { ?artist wdt:P135 ?movement. }
  OPTIONAL { ?artist wdt:P136 ?style. }
  OPTIONAL { ?artist wdt:P856 ?officialWebsite. }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

# Execute the query
sparql.setQuery(count_query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Extract and print the count
total_artists = results['results']['bindings'][0]['totalArtists']['value']
print(f"Total Artists Found: {total_artists}")

Total Artists Found: 535


In [15]:
# SPARQL endpoint
endpoint_url = "https://query.wikidata.org/sparql"
sparql = SPARQLWrapper(endpoint_url)

# Correctly formatted SPARQL query
query = """
SELECT DISTINCT ?artist ?artistLabel ?birthDate ?birthPlaceLabel ?deathDate ?deathPlaceLabel 
       ?nationalityLabel ?educationLabel ?yearsActive 
       ?notableWorkLabel ?knownForLabel ?styleLabel ?movementLabel ?officialWebsite WHERE {

  # Artists must be African American painters
  ?artist wdt:P31 wd:Q5;                # Human
          wdt:P172 wd:Q49085;           # African American ethnicity
          wdt:P106 wd:Q1028181.         # Painter occupation

  # Requesting labels explicitly
  OPTIONAL { ?artist wdt:P569 ?birthDate. }
  OPTIONAL { ?artist wdt:P19 ?birthPlace. }
  OPTIONAL { ?artist wdt:P570 ?deathDate. }
  OPTIONAL { ?artist wdt:P20 ?deathPlace. }

  # Correcting label returns for problematic fields
  OPTIONAL { ?artist wdt:P27 ?nationality. ?nationality rdfs:label ?nationalityLabel. FILTER(LANG(?nationalityLabel) = "en") }
  OPTIONAL { ?artist wdt:P69 ?education. ?education rdfs:label ?educationLabel. FILTER(LANG(?educationLabel) = "en") }
  OPTIONAL { ?artist wdt:P800 ?notableWork. ?notableWork rdfs:label ?notableWorkLabel. FILTER(LANG(?notableWorkLabel) = "en") }
  OPTIONAL { ?artist wdt:P2561 ?knownFor. ?knownFor rdfs:label ?knownForLabel. FILTER(LANG(?knownForLabel) = "en") }
  OPTIONAL { ?artist wdt:P135 ?movement. ?movement rdfs:label ?movementLabel. FILTER(LANG(?movementLabel) = "en") }
  OPTIONAL { ?artist wdt:P136 ?style. ?style rdfs:label ?styleLabel. FILTER(LANG(?styleLabel) = "en") }

  # Keep official websites as expected URLs
  OPTIONAL { ?artist wdt:P856 ?officialWebsite. }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY ?artistLabel
LIMIT 600
"""

# Run the SPARQL query
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()



In [17]:
# Extract results into a DataFrame
data = []
for item in results['results']['bindings']:
    data.append({
        'Artist': item['artistLabel']['value'],
        'Birth Date': item.get('birthDate', {}).get('value', ''),
        'Birth Place': item.get('birthPlaceLabel', {}).get('value', ''),
        'Death Date': item.get('deathDate', {}).get('value', ''),
        'Death Place': item.get('deathPlaceLabel', {}).get('value', ''),
        'Nationality': item.get('nationalityLabel', {}).get('value', ''),
        'Education': item.get('educationLabel', {}).get('value', ''),
        'Years Active': item.get('yearsActive', {}).get('value', ''),
        'Notable Work': item.get('notableWorkLabel', {}).get('value', ''),
        'Known For': item.get('knownForLabel', {}).get('value', ''),
        'Style': item.get('styleLabel', {}).get('value', ''),
        'Movement': item.get('movementLabel', {}).get('value', ''),
        'Official Website': item.get('officialWebsite', {}).get('value', '')
    })

# Convert to DataFrame
df_artists = pd.DataFrame(data)

In [19]:
df_artists.head(50)

Unnamed: 0,Artist,Birth Date,Birth Place,Death Date,Death Place,Nationality,Education,Years Active,Notable Work,Known For,Style,Movement,Official Website
0,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Kansas,,,,portrait,Harlem Renaissance,
1,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Nebraska–Lincoln,,,,portrait,Harlem Renaissance,
2,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,Topeka High School,,,,portrait,Harlem Renaissance,
3,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Kansas,,,,self-portrait,Harlem Renaissance,
4,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Nebraska–Lincoln,,,,self-portrait,Harlem Renaissance,
5,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,Topeka High School,,,,self-portrait,Harlem Renaissance,
6,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Kansas,,,,still life,Harlem Renaissance,
7,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Nebraska–Lincoln,,,,still life,Harlem Renaissance,
8,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,Topeka High School,,,,still life,Harlem Renaissance,
9,Aaron Douglas,1899-05-26T00:00:00Z,Topeka,1979-02-02T00:00:00Z,Nashville,United States of America,University of Kansas,,,,history painting,Harlem Renaissance,


In [21]:
df_artists.tail(50)

Unnamed: 0,Artist,Birth Date,Birth Place,Death Date,Death Place,Nationality,Education,Years Active,Notable Work,Known For,Style,Movement,Official Website
550,Harlan Jackson,1918-01-01T00:00:00Z,Cleburne,1993-01-01T00:00:00Z,,United States of America,Emporia State University,,,,abstract art,,
551,Harlan Jackson,1918-01-01T00:00:00Z,Cleburne,1993-01-01T00:00:00Z,,United States of America,California School of Fine Arts,,,,abstract art,,
552,Harlan Jackson,1918-01-01T00:00:00Z,Cleburne,1993-01-01T00:00:00Z,,United States of America,Hans Hoffman School of Fine Art,,,,abstract art,,
553,Harvey Ramseur,,Greensboro,,,,University of Connecticut,,,,relief sculpture,,
554,Harvey Ramseur,,Greensboro,,,,Occidental College,,,,relief sculpture,,
555,Harvey Ramseur,,Greensboro,,,,North Carolina Central University,,,,relief sculpture,,
556,Hayward Oubre,1916-01-01T00:00:00Z,,2006-01-01T00:00:00Z,,United States of America,,,,,,,
557,Hector Hyppolite,1894-09-16T00:00:00Z,Saint-Marc,1948-06-09T00:00:00Z,,Haiti,,,,,,,
558,Henry Ossawa Tanner,1859-06-21T00:00:00Z,Pittsburgh,1937-05-24T00:00:00Z,6th arrondissement of Paris,United States of America,Académie Julian,,The Thankful Poor,,landscape painting,realism,
559,Henry Ossawa Tanner,1859-06-21T00:00:00Z,Pittsburgh,1937-05-24T00:00:00Z,6th arrondissement of Paris,United States of America,Académie Julian,,The Young Sabot Maker,,landscape painting,realism,


In [None]:
# Group by Artist and aggregate unique values for each field
df_cleaned = df_artists.groupby('Artist', as_index=False).agg({
    'Birth Date': 'first',
    'Birth Place': 'first',
    'Death Date': 'first',
    'Death Place': 'first',
    'Nationality': 'first',
    'Education': lambda x: ', '.join(pd.unique(x.dropna())),
    'Years Active': 'first',
    'Notable Work': lambda x: ', '.join(pd.unique(x.dropna())),
    'Known For': lambda x: ', '.join(pd.unique(x.dropna())),
    'Style': lambda x: ', '.join(pd.unique(x.dropna())),
    'Movement': lambda x: ', '.join(pd.unique(x.dropna())),
    'Official Website': 'first'
})

# Display the cleaned DataFrame
df_cleaned.head()