# Querying and Visualizing Wikidata

This script walks through using Python to query Wikidata with SPARQL and visualize the results with Panda

Created by Jay Winkler, Alex Wermer-Colan, Synatra Smith, and Rebecca Bayek

# Installing Libraries

In [None]:
!pip install SPARQLWrapper
%load_ext google.colab.data_table 
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
import requests
import json
!pip install -U plotly
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

Collecting SPARQLWrapper
  Downloading SPARQLWrapper-1.8.5-py3-none-any.whl (26 kB)
Collecting rdflib>=4.0
  Downloading rdflib-6.1.1-py3-none-any.whl (482 kB)
[K     |████████████████████████████████| 482 kB 7.6 MB/s 
Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 478 kB/s 
Installing collected packages: isodate, rdflib, SPARQLWrapper
Successfully installed SPARQLWrapper-1.8.5 isodate-0.6.1 rdflib-6.1.1
Collecting plotly
  Downloading plotly-5.5.0-py2.py3-none-any.whl (26.5 MB)
[K     |████████████████████████████████| 26.5 MB 1.9 MB/s 
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-5.5.0 tenacity-8.0.1


# Creating a Wikidata Query in SPARQL

The following query is designed to capture a few key pieces of information about every artist with a US birthplace. The third line of the where statement asks WDQS to include every artist that is from a US state, a US city, or the United States item itself.

In [None]:
sparql.setQuery("""
SELECT
    ?artist ?artistLabel ?sexGenderLabel ?sexualOrientationLabel ?ethnicityLabel
    (group_concat(DISTINCT(?birthPlaceLabel);separator=", ") as ?birthPlaces)
    
WHERE
{
    ?artist wdt:P106 wd:Q483501 .
    ?artist wdt:P19 ?birthPlace .
    { ?birthPlace wdt:P31/wdt:P279* wd:Q35657. } UNION { ?birthPlace wdt:P31/wdt:P279* wd:Q1093829. } UNION { ?birthPlace wdt:P19 wd:Q30. }
    OPTIONAL { ?artist wdt:P21 ?sexGender. }
    OPTIONAL { ?artist wdt:P91 ?sexualOrientation. }
    OPTIONAL { ?artist wdt:P172 ?ethnicity. }
    SERVICE wikibase:label { 
    bd:serviceParam wikibase:language "en". 
    ?artist rdfs:label ?artistLabel . 
    ?sexGender rdfs:label ?sexGenderLabel .
    ?birthPlace rdfs:label ?birthPlaceLabel .
    ?sexualOrientation rdfs:label ?sexualOrientationLabel .
    ?ethnicity rdfs:label ?ethnicityLabel .
  }
}
GROUP BY ?artist ?artistLabel ?sexGenderLabel ?sexualOrientationLabel ?ethnicityLabel
ORDER BY ?artistLabel
""")


In [None]:
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Creating Dataframe

In [None]:
data = pd.json_normalize(results['results']['bindings'])

In [None]:
cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value'
          ]
data[cols]

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami
...,...,...,...,...,...,...
3733,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda
3734,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee
3735,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton
3736,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham


### Data Wrangling

Stripping QNumber from Wikidata URL, Adding QNumber to Own Column

In [None]:
new = data["artist.value"].str.split("/", n = -1, expand = True)
data["QNum"] = new[4]

In [None]:
#Stripping the QNumber adds several columns that simply display the datatype or language, re-declaring which columns are actually needed makes the data a little easier to work with.
new_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum'
          ]
data = data[new_cols]
# data

### Querying Wikidata API 

This queries for the first timestamp of each artist

In [None]:
S = requests.Session()

URL = "https://www.wikidata.org/w/api.php"

finalDate = []

for item in data["QNum"]:

  PARAMS = {
      "action": "query",
      "format": "json",
      "prop": "revisions",
      "titles": item,
      "rvprop": "timestamp",
      "rvlimit": "1",
      "rvdir": "newer"
  }

  R = S.get(url=URL, params=PARAMS)
  RESULTS = R.json()

  PAGE_DICT = RESULTS["query"]["pages"]
  for key, value in PAGE_DICT.items() :
      ID_NUM = key

#Stripping just the date, the query automatically includes the time which is unnecessary.
  DATETIME = RESULTS['query']['pages'][ID_NUM]['revisions'][0]['timestamp']
  DATE = DATETIME.split("T")[0]
  finalDate.append(DATE)

data["DateAdded"] = finalDate
# data

### Dataframe Manipulation

In [None]:
#Importing dateutil and retyping the date column.
import dateutil
data['DateAdded'] = data['DateAdded'].apply(dateutil.parser.parse, dayfirst=False)

In [None]:
#Grabbing "African American" directly from the data as a variable. 
a_a = data['ethnicityLabel.value'][10]

#Replacing NaN with "None" in the ethnicityLabel.value column.
new_eth = data["ethnicityLabel.value"].fillna("None")
data["ethnicityLabel.value"] = new_eth

# data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles,Q16148018,2014-04-07
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia,Q4647785,2013-02-15
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn,Q4648425,2013-02-15
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola,Q28800265,2017-02-18
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami,Q16197272,2014-04-08
...,...,...,...,...,...,...,...,...
3733,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda,Q105839391,2021-03-09
3734,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee,Q28864849,2017-03-03
3735,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton,Q55263932,2018-06-29
3736,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham,Q456628,2012-12-06


In [None]:
#Creating a "none" variable direct from the data, similar to the a_a variable above.
nan = data['ethnicityLabel.value'][225]

#Replacing all ethnicities other than "African American" and "None" with "Other"
keep = [a_a, nan]
eth_concat = data['ethnicityLabel.value'].where(data['ethnicityLabel.value'].isin(keep), "Other")
data['ethnicityLabel.value'] = eth_concat

# data

In [None]:
#Sorting by date, resetting index.

data = data.sort_values(by='DateAdded')
data = data.reset_index()
# data

This next section creates columns that track change over time for the percenteage of each ethnic group represented in Wikidata.

In [None]:
#First, a list of all the values in my ethnicityLabel.value column is created. Then empty lists are created to hold each of the percentages that are calculated below. Then declare n as 0.
eth_list = data['ethnicityLabel.value'].tolist()
a_a_pct = []
other_pct = []
nan_pct = []
n = 0

#Here python iterates through eth_list and recalculates the percentage represented by each ethnic group after every item, then adds that percentage to each respective list.
for item in eth_list:
  n = n + 1
  a_a_count = eth_list[0:n].count(a_a)
  a_a_app = a_a_count / n
  a_a_pct.append(a_a_app)
  other_count = eth_list[0:n].count('Other')
  other_app = other_count / n
  other_pct.append(other_app)
  nan_count = eth_list[0:n].count(nan)
  nan_app = nan_count / n
  nan_pct.append(nan_app)

In [None]:
#Add each of the lists to the dataframe as a column.

data['AfricanAmericanPct'] = a_a_pct
data['OtherPct'] = other_pct
data['NonePct'] = nan_pct
# data

Unnamed: 0,index,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
0,1923,http://www.wikidata.org/entity/Q4538,Jonathan Winters,male,,Other,Dayton,Q4538,2012-10-31,0.000000,1.000000,0.000000
1,62,http://www.wikidata.org/entity/Q8346,Albert Arnold Gillespie,male,,,El Paso,Q8346,2012-11-02,0.000000,0.500000,0.500000
2,694,http://www.wikidata.org/entity/Q8372,Craig Barron,male,,,Berkeley,Q8372,2012-11-02,0.000000,0.333333,0.666667
3,1844,http://www.wikidata.org/entity/Q8354,John Frazier,male,,,Richmond,Q8354,2012-11-02,0.000000,0.250000,0.750000
4,2015,http://www.wikidata.org/entity/Q15935,Kanye West,male,,African Americans,Atlanta,Q15935,2012-11-07,0.200000,0.200000,0.600000
...,...,...,...,...,...,...,...,...,...,...,...,...
3733,2688,http://www.wikidata.org/entity/Q110426213,Natasha Zupan,female,,,Columbus,Q110426213,2022-01-05,0.062400,0.048206,0.889395
3734,1362,http://www.wikidata.org/entity/Q110443987,Guy A. Wiggins,male,,,New London,Q110443987,2022-01-06,0.062383,0.048193,0.889424
3735,253,http://www.wikidata.org/entity/Q110457931,Ashton Wilson,female,,,Charleston,Q110457931,2022-01-07,0.062366,0.048180,0.889454
3736,1466,http://www.wikidata.org/entity/Q110466018,Horace Potter,male,,,Cleveland,Q110466018,2022-01-07,0.062349,0.048167,0.889484


# Data Visualization

Create the figure of All Artists in Plotly Graphic Objects

In [None]:
#Create the figure.
fig_main = go.Figure()

#Add each line as a trace.
fig_main.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_main.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_main.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['NonePct'],
    name="No Ethnicity Statement"
))

#Format title, adjust axis guidelines to better work with the data.
fig_main.update_layout(title="All Artists",
                       yaxis=dict(tickformat=".1%"))

fig_main.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_main.show()

## Refining DataFrame

In [None]:
#Declare a Philadelphia variable. 
gritty = ['Philadelphia']

#Create a new Dataframe that only includes the rows whose birthPlaces.value match the variable. Declare the other columns need as well.
phi_data = data[data['birthPlaces.value'].isin(gritty)]
phi_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
phi_data = phi_data[phi_cols]
phi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
9,http://www.wikidata.org/entity/Q45111,John R. Neill,male,,,Philadelphia,Q45111,2012-11-13
123,http://www.wikidata.org/entity/Q533275,Edward Percy Moran,male,,,Philadelphia,Q533275,2012-12-07
224,http://www.wikidata.org/entity/Q1328705,Paul Garrin,male,,,Philadelphia,Q1328705,2012-12-20
232,http://www.wikidata.org/entity/Q1364808,William Wallace Denslow,male,,,Philadelphia,Q1364808,2012-12-21
234,http://www.wikidata.org/entity/Q1349603,John Ernest,male,,,Philadelphia,Q1349603,2012-12-21
...,...,...,...,...,...,...,...,...
3608,http://www.wikidata.org/entity/Q108439127,Edwin Lester,male,,African Americans,Philadelphia,Q108439127,2021-09-07
3610,http://www.wikidata.org/entity/Q108498023,Anna Clegg Stryke,female,,,Philadelphia,Q108498023,2021-09-11
3645,http://www.wikidata.org/entity/Q108922332,Edwin Lamasure,male,,,Philadelphia,Q108922332,2021-10-18
3725,http://www.wikidata.org/entity/Q110267399,Louis De Mayo,male,,,Philadelphia,Q110267399,2021-12-26


Repeating the Process of gathering percentages and adding them to the new DataFrame

In [None]:
ph_eth_list = phi_data['ethnicityLabel.value'].tolist()
ph_a_a_pct = []
ph_other_pct = []
ph_nan_pct = []
n = 0

for item in ph_eth_list:
  n = n + 1
  ph_a_a_count = ph_eth_list[0:n].count(a_a)
  ph_a_a_app = ph_a_a_count / n
  ph_a_a_pct.append(ph_a_a_app)
  ph_other_count = ph_eth_list[0:n].count('Other')
  ph_other_app = ph_other_count / n
  ph_other_pct.append(ph_other_app)
  ph_nan_count = ph_eth_list[0:n].count(nan)
  ph_nan_app = ph_nan_count / n
  ph_nan_pct.append(ph_nan_app)
phi_data['AfricanAmericanPct'] = ph_a_a_pct
phi_data['OtherPct'] = ph_other_pct
phi_data['NonePct'] = ph_nan_pct
phi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
9,http://www.wikidata.org/entity/Q45111,John R. Neill,male,,,Philadelphia,Q45111,2012-11-13,0.000000,0.000000,1.000000
123,http://www.wikidata.org/entity/Q533275,Edward Percy Moran,male,,,Philadelphia,Q533275,2012-12-07,0.000000,0.000000,1.000000
224,http://www.wikidata.org/entity/Q1328705,Paul Garrin,male,,,Philadelphia,Q1328705,2012-12-20,0.000000,0.000000,1.000000
232,http://www.wikidata.org/entity/Q1364808,William Wallace Denslow,male,,,Philadelphia,Q1364808,2012-12-21,0.000000,0.000000,1.000000
234,http://www.wikidata.org/entity/Q1349603,John Ernest,male,,,Philadelphia,Q1349603,2012-12-21,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
3608,http://www.wikidata.org/entity/Q108439127,Edwin Lester,male,,African Americans,Philadelphia,Q108439127,2021-09-07,0.187500,0.006250,0.806250
3610,http://www.wikidata.org/entity/Q108498023,Anna Clegg Stryke,female,,,Philadelphia,Q108498023,2021-09-11,0.186335,0.006211,0.807453
3645,http://www.wikidata.org/entity/Q108922332,Edwin Lamasure,male,,,Philadelphia,Q108922332,2021-10-18,0.185185,0.006173,0.808642
3725,http://www.wikidata.org/entity/Q110267399,Louis De Mayo,male,,,Philadelphia,Q110267399,2021-12-26,0.184049,0.006135,0.809816


## Graphing the Philly Data

In [None]:
#For the most part, the city-level charts have the same basic code structure as the national data.
fig_phi = go.Figure()
fig_phi.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_phi.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_phi.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['NonePct'],
    name="No Ethnicity Statement"
))

#Adding a label for the shape marking the LEADING project period.
fig_phi.add_trace(go.Scatter(
    x=['2021-07-10'],
    y=[0.20],
    text="LEADING Project",
    mode="text",
    showlegend=False))

fig_phi.update_layout(title="Philadelphia Artists",
                       yaxis=dict(tickformat=".1%"))

fig_phi.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

#Adding a shape to highlight the leading project period.
fig_phi.add_shape(type="circle",
                  x0='2021-07-15', x1='2021-12-31',
                  y0=.1, y1=.18,
                  line_color='LightSeaGreen', name='LEADING Project Period')

fig_phi.show()

## Repeat the Philly Process for New York City

In [None]:
walkin_here = ['New York City']
ny_data = data[data['birthPlaces.value'].isin(walkin_here)]
ny_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
ny_data = ny_data[ny_cols]
ny_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
23,http://www.wikidata.org/entity/Q158255,Lyonel Feininger,male,,,New York City,Q158255,2012-11-27
32,http://www.wikidata.org/entity/Q216298,Nicky Hilton,female,,,New York City,Q216298,2012-11-30
41,http://www.wikidata.org/entity/Q230817,Devon Aoki,female,,,New York City,Q230817,2012-12-01
53,http://www.wikidata.org/entity/Q243078,Alison Knowles,female,,,New York City,Q243078,2012-12-01
56,http://www.wikidata.org/entity/Q297618,Adam Savage,male,,,New York City,Q297618,2012-12-02
...,...,...,...,...,...,...,...,...
3654,http://www.wikidata.org/entity/Q109275000,Mariaceleste De Martino,female,,,New York City,Q109275000,2021-10-26
3674,http://www.wikidata.org/entity/Q109690242,Margaret Noel Abell,female,,,New York City,Q109690242,2021-11-23
3682,http://www.wikidata.org/entity/Q109888934,Allen M. Hart,male,,,New York City,Q109888934,2021-12-02
3695,http://www.wikidata.org/entity/Q110111773,Warren F. Scadron,,,,New York City,Q110111773,2021-12-14


In [None]:
ny_eth_list = ny_data['ethnicityLabel.value'].tolist()
ny_a_a_pct = []
ny_other_pct = []
ny_nan_pct = []
n = 0

for item in ny_eth_list:
  n = n + 1
  ny_a_a_count = ny_eth_list[0:n].count(a_a)
  ny_a_a_app = ny_a_a_count / n
  ny_a_a_pct.append(ny_a_a_app)
  ny_other_count = ny_eth_list[0:n].count('Other')
  ny_other_app = ny_other_count / n
  ny_other_pct.append(ny_other_app)
  ny_nan_count = ny_eth_list[0:n].count(nan)
  ny_nan_app = ny_nan_count / n
  ny_nan_pct.append(ny_nan_app)
ny_data['AfricanAmericanPct'] = ny_a_a_pct
ny_data['OtherPct'] = ny_other_pct
ny_data['NonePct'] = ny_nan_pct
ny_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
23,http://www.wikidata.org/entity/Q158255,Lyonel Feininger,male,,,New York City,Q158255,2012-11-27,0.000000,0.000000,1.000000
32,http://www.wikidata.org/entity/Q216298,Nicky Hilton,female,,,New York City,Q216298,2012-11-30,0.000000,0.000000,1.000000
41,http://www.wikidata.org/entity/Q230817,Devon Aoki,female,,,New York City,Q230817,2012-12-01,0.000000,0.000000,1.000000
53,http://www.wikidata.org/entity/Q243078,Alison Knowles,female,,,New York City,Q243078,2012-12-01,0.000000,0.000000,1.000000
56,http://www.wikidata.org/entity/Q297618,Adam Savage,male,,,New York City,Q297618,2012-12-02,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
3654,http://www.wikidata.org/entity/Q109275000,Mariaceleste De Martino,female,,,New York City,Q109275000,2021-10-26,0.034884,0.016279,0.948837
3674,http://www.wikidata.org/entity/Q109690242,Margaret Noel Abell,female,,,New York City,Q109690242,2021-11-23,0.034803,0.016241,0.948956
3682,http://www.wikidata.org/entity/Q109888934,Allen M. Hart,male,,,New York City,Q109888934,2021-12-02,0.034722,0.016204,0.949074
3695,http://www.wikidata.org/entity/Q110111773,Warren F. Scadron,,,,New York City,Q110111773,2021-12-14,0.034642,0.016166,0.949192


In [None]:
fig_ny = go.Figure()
fig_ny.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_ny.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_ny.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_ny.update_layout(title="New York City Artists",
                       yaxis=dict(tickformat=".1%"))

fig_ny.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_ny.show()

## Detroit

In [None]:
vs_everybody = ['Detroit']
det_data = data[data['birthPlaces.value'].isin(vs_everybody)]
det_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
det_data = det_data[ny_cols]
det_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
28,http://www.wikidata.org/entity/Q178040,Ray Johnson,male,,,Detroit,Q178040,2012-11-28
130,http://www.wikidata.org/entity/Q543294,Mike Kelley,male,,,Detroit,Q543294,2012-12-07
186,http://www.wikidata.org/entity/Q922536,Sarah Jackson,female,,,Detroit,Q922536,2012-12-14
187,http://www.wikidata.org/entity/Q882861,James Lee Byars,male,,,Detroit,Q882861,2012-12-14
247,http://www.wikidata.org/entity/Q1444685,John Nelson,male,,,Detroit,Q1444685,2012-12-22
...,...,...,...,...,...,...,...,...
3411,http://www.wikidata.org/entity/Q107253766,G. Ray Kerciu,male,,,Detroit,Q107253766,2021-06-15
3414,http://www.wikidata.org/entity/Q107253769,Gary Metz,male,,,Detroit,Q107253769,2021-06-15
3455,http://www.wikidata.org/entity/Q107399564,Ken Aptekar,male,,,Detroit,Q107399564,2021-07-02
3659,http://www.wikidata.org/entity/Q109399033,Ben Blount,male,,,Detroit,Q109399033,2021-11-04


In [None]:
det_eth_list = det_data['ethnicityLabel.value'].tolist()
det_a_a_pct = []
det_other_pct = []
det_nan_pct = []
n = 0

for item in det_eth_list:
  n = n + 1
  det_a_a_count = det_eth_list[0:n].count(a_a)
  det_a_a_app = det_a_a_count / n
  det_a_a_pct.append(det_a_a_app)
  det_other_count = det_eth_list[0:n].count('Other')
  det_other_app = det_other_count / n
  det_other_pct.append(det_other_app)
  det_nan_count = det_eth_list[0:n].count(nan)
  det_nan_app = det_nan_count / n
  det_nan_pct.append(det_nan_app)
det_data['AfricanAmericanPct'] = det_a_a_pct
det_data['OtherPct'] = det_other_pct
det_data['NonePct'] = det_nan_pct
det_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
28,http://www.wikidata.org/entity/Q178040,Ray Johnson,male,,,Detroit,Q178040,2012-11-28,0.000000,0.000000,1.000000
130,http://www.wikidata.org/entity/Q543294,Mike Kelley,male,,,Detroit,Q543294,2012-12-07,0.000000,0.000000,1.000000
186,http://www.wikidata.org/entity/Q922536,Sarah Jackson,female,,,Detroit,Q922536,2012-12-14,0.000000,0.000000,1.000000
187,http://www.wikidata.org/entity/Q882861,James Lee Byars,male,,,Detroit,Q882861,2012-12-14,0.000000,0.000000,1.000000
247,http://www.wikidata.org/entity/Q1444685,John Nelson,male,,,Detroit,Q1444685,2012-12-22,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
3411,http://www.wikidata.org/entity/Q107253766,G. Ray Kerciu,male,,,Detroit,Q107253766,2021-06-15,0.065574,0.081967,0.852459
3414,http://www.wikidata.org/entity/Q107253769,Gary Metz,male,,,Detroit,Q107253769,2021-06-15,0.064516,0.080645,0.854839
3455,http://www.wikidata.org/entity/Q107399564,Ken Aptekar,male,,,Detroit,Q107399564,2021-07-02,0.063492,0.079365,0.857143
3659,http://www.wikidata.org/entity/Q109399033,Ben Blount,male,,,Detroit,Q109399033,2021-11-04,0.062500,0.078125,0.859375


In [None]:
fig_det = go.Figure()
fig_det.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_det.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_det.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_det.update_layout(title="Detroit Artists",
                       yaxis=dict(tickformat=".1%"))

fig_det.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_det.show()

## Chicago

In [None]:
melort = ['Chicago']
chi_data = data[data['birthPlaces.value'].isin(melort)]
chi_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
chi_data = chi_data[chi_cols]
chi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
29,http://www.wikidata.org/entity/Q202621,Larry Bell,male,,,Chicago,Q202621,2012-11-29
30,http://www.wikidata.org/entity/Q192410,Jennifer Hudson,female,,African Americans,Chicago,Q192410,2012-11-29
39,http://www.wikidata.org/entity/Q268542,Wendy Schaal,female,,,Chicago,Q268542,2012-12-01
51,http://www.wikidata.org/entity/Q234928,Alice Bradley Sheldon,female,bisexuality,,Chicago,Q234928,2012-12-01
101,http://www.wikidata.org/entity/Q442214,Cynthia Plaster Caster,female,,,Chicago,Q442214,2012-12-06
...,...,...,...,...,...,...,...,...
3614,http://www.wikidata.org/entity/Q108570562,Gayle Tanaka,female,,,Chicago,Q108570562,2021-09-17
3619,http://www.wikidata.org/entity/Q108678763,Rose Crosman,female,,,Chicago,Q108678763,2021-09-24
3639,http://www.wikidata.org/entity/Q108758789,Alva Mooses,,,,Chicago,Q108758789,2021-10-01
3653,http://www.wikidata.org/entity/Q109230278,Galia Zamir,female,,,Chicago,Q109230278,2021-10-25


In [None]:
chi_eth_list = chi_data['ethnicityLabel.value'].tolist()
chi_a_a_pct = []
chi_other_pct = []
chi_nan_pct = []
n = 0

for item in chi_eth_list:
  n = n + 1
  chi_a_a_count = chi_eth_list[0:n].count(a_a)
  chi_a_a_app = chi_a_a_count / n
  chi_a_a_pct.append(chi_a_a_app)
  chi_other_count = chi_eth_list[0:n].count('Other')
  chi_other_app = chi_other_count / n
  chi_other_pct.append(chi_other_app)
  chi_nan_count = chi_eth_list[0:n].count(nan)
  chi_nan_app = chi_nan_count / n
  chi_nan_pct.append(chi_nan_app)
chi_data['AfricanAmericanPct'] = chi_a_a_pct
chi_data['OtherPct'] = chi_other_pct
chi_data['NonePct'] = chi_nan_pct
chi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
29,http://www.wikidata.org/entity/Q202621,Larry Bell,male,,,Chicago,Q202621,2012-11-29,0.000000,0.000000,1.000000
30,http://www.wikidata.org/entity/Q192410,Jennifer Hudson,female,,African Americans,Chicago,Q192410,2012-11-29,0.500000,0.000000,0.500000
39,http://www.wikidata.org/entity/Q268542,Wendy Schaal,female,,,Chicago,Q268542,2012-12-01,0.333333,0.000000,0.666667
51,http://www.wikidata.org/entity/Q234928,Alice Bradley Sheldon,female,bisexuality,,Chicago,Q234928,2012-12-01,0.250000,0.000000,0.750000
101,http://www.wikidata.org/entity/Q442214,Cynthia Plaster Caster,female,,,Chicago,Q442214,2012-12-06,0.200000,0.000000,0.800000
...,...,...,...,...,...,...,...,...,...,...,...
3614,http://www.wikidata.org/entity/Q108570562,Gayle Tanaka,female,,,Chicago,Q108570562,2021-09-17,0.066667,0.018182,0.915152
3619,http://www.wikidata.org/entity/Q108678763,Rose Crosman,female,,,Chicago,Q108678763,2021-09-24,0.066265,0.018072,0.915663
3639,http://www.wikidata.org/entity/Q108758789,Alva Mooses,,,,Chicago,Q108758789,2021-10-01,0.065868,0.017964,0.916168
3653,http://www.wikidata.org/entity/Q109230278,Galia Zamir,female,,,Chicago,Q109230278,2021-10-25,0.065476,0.017857,0.916667


In [None]:
fig_chi = go.Figure()
fig_chi.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_chi.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_chi.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_chi.update_layout(title="Chicago Artists",
                       yaxis=dict(tickformat=".1%"))

fig_chi.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_chi.show()

# Create a graph that shows all cities.

In [None]:
#For the most part, this is just adding a lot of plotly traces from different dataframes to a single graph. For readability, "Other" has simply been excluded here. Ethnicity information has been grouped within the legend, and the national data has been given a greater line weight.
fig_merge = go.Figure()
fig_merge.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['AfricanAmericanPct'],
    name="National Data",
    legendgroup="AfricanAmerican",
    legendgrouptitle_text="African American Ethnicity Statement",
    line=dict(width=4)
))

fig_merge.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['NonePct'],
    name="National Data",
    legendgroup="None",
    legendgrouptitle_text="No Ethnicity Statement",
    line=dict(width=4)
))

fig_merge.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['AfricanAmericanPct'],
    name="Chicago",
    legendgroup="AfricanAmerican"
))

fig_merge.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['NonePct'],
    name="Chicago",
    legendgroup="None"
))

fig_merge.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['AfricanAmericanPct'],
    name="Detroit",
    legendgroup="AfricanAmerican"
))

fig_merge.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['NonePct'],
    name="Detroit",
    legendgroup="None"
))

fig_merge.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['AfricanAmericanPct'],
    name="New York City",
    legendgroup="AfricanAmerican"
))

fig_merge.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['NonePct'],
    name="New York City",
    legendgroup="None"
))

fig_merge.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['AfricanAmericanPct'],
    name="Philadelphia",
    legendgroup="AfricanAmerican"
))

fig_merge.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['NonePct'],
    name="Philadelphia",
    legendgroup="None"
))

fig_merge.update_layout(title="Artists Across Cities",
                       yaxis=dict(tickformat=".1%"))

fig_merge.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_merge.show()