### Import libraries

In [1]:
from collections import Counter
import pandas as pd

### Load CSV files

In [3]:
awards = pd.read_csv('../data/wikidata_awards.csv')
awards

Unnamed: 0,award_id,award_name,award_type,award_category
0,0,MOJO Awards,MOJO Awards,
1,1,Grammy Award for Best Rock Performance by a Du...,Grammy Award,Best Rock Performance by a Duo or Group with V...
2,2,MTV Europe Music Award for Best Album,MTV Europe Music Award,Best Album
3,3,Grammy Award for Best Rock Album,Grammy Award,Best Rock Album
4,4,Grammy Award for Best Dance/Electronic Album,Grammy Award,Best Dance/Electronic Album
...,...,...,...,...
188,188,Latin Grammy Award for Best Recording Package,Latin Grammy Award,Best Recording Package
189,189,First prize of the Eurovision Song Contest,First prize of the Eurovision Song Contest,
190,190,MTV Europe Music Award for Best Korean Act,MTV Europe Music Award,Best Korean Act
191,191,P3 Gull for Artist of the Year,P3 Gull,Artist of the Year


In [4]:
statements = pd.read_csv('../data/wikidata_award_statements.csv')
statements

Unnamed: 0,artist_spotify_id,award_id,award_year
0,0L8ExT028jH3ddEcZwqJJ5,0,_
1,0L8ExT028jH3ddEcZwqJJ5,1,2006
2,0L8ExT028jH3ddEcZwqJJ5,2,2006
3,0L8ExT028jH3ddEcZwqJJ5,3,2006
4,4tZwfgrHOc3mvqYlEYSvVi,4,2008
...,...,...,...
997,0ghlgldX5Dd6720Q3qFyQB,186,2020
998,2KC9Qb60EaY0kW4eH68vr3,186,2020
999,4yxLYO2imECxGYTTV7RQKb,192,_
1000,5t5FqBwTcgKTaWmfEbwQY9,186,2021


### Preprocessing
- try to split type and category with dashes
- identify non-unique awards
- asssign each award a class based on frequency

In [5]:
# split by dashes
for index, row in awards.iterrows():
    aType = row['award_type'].split(' – ', maxsplit=1)
    if len(aType) == 1:
        aType = row['award_type'].split(' - ', maxsplit=1)
    awards.at[index, 'award_type'] = aType[0]
    if len(aType) > 1:
        awards.at[index, 'award_category'] = aType[1]

In [6]:
# drop duplicates and get a list
award_types = awards['award_type'].drop_duplicates().reset_index(drop=True)
aw_list = awards['award_type'].tolist()
# compute frequencies
freq_award = Counter(aw_list)
# select awards types that appear at least twice
atleast2 = [el for el, freq in freq_award.items() if freq >= 2]
atleast2

['Grammy Award',
 'MTV Europe Music Award',
 'Academy Award',
 'Latin Grammy Award',
 'American Music Award',
 'Juno Award',
 'Americana Award',
 'Gramophone Award',
 'Spellemann Award',
 'Soul Train Music Award',
 'Billboard Music Award']

In [7]:
# add award class column based on previous result
awards['award_category'] = awards['award_category'].fillna('_')
award_class = []
for index, row in awards.iterrows():
    award_type = row['award_type']
    if 'MTV' in award_type:
        award_class.append('MTV')
    elif 'Grammy' in award_type:
        award_class.append('Grammy')
    elif 'Americana' in award_type:
        award_class.append('Americana')
    elif 'American Music' in award_type:
        award_class.append('American Music')
    elif 'Juno' in award_type:
        award_class.append('Juno')
    elif 'Spellemann' in award_type:
        award_class.append('Spellemann')
    elif 'Soul Train' in award_type:
        award_class.append('Soul Train')
    elif 'Billboard' in award_type:
        award_class.append('Billboard')
    elif 'Academy' in award_type:
        award_class.append('Academy')
    elif 'Gramophone' in award_type:
        award_class.append('Gramophone')
    else:
        award_class.append('Generic')
        
awards['award_class'] = award_class
awards

Unnamed: 0,award_id,award_name,award_type,award_category,award_class
0,0,MOJO Awards,MOJO Awards,_,Generic
1,1,Grammy Award for Best Rock Performance by a Du...,Grammy Award,Best Rock Performance by a Duo or Group with V...,Grammy
2,2,MTV Europe Music Award for Best Album,MTV Europe Music Award,Best Album,MTV
3,3,Grammy Award for Best Rock Album,Grammy Award,Best Rock Album,Grammy
4,4,Grammy Award for Best Dance/Electronic Album,Grammy Award,Best Dance/Electronic Album,Grammy
...,...,...,...,...,...
188,188,Latin Grammy Award for Best Recording Package,Latin Grammy Award,Best Recording Package,Grammy
189,189,First prize of the Eurovision Song Contest,First prize of the Eurovision Song Contest,_,Generic
190,190,MTV Europe Music Award for Best Korean Act,MTV Europe Music Award,Best Korean Act,MTV
191,191,P3 Gull for Artist of the Year,P3 Gull,Artist of the Year,Generic


### TTL file

In [None]:
"""### https://www.dei.unipd.it/db2/ontology/soundgraph#GrammyAward2012
:%s%s rdf:type owl:Class ;
                 owl:equivalentClass [ rdf:type owl:Restriction ;
                                       owl:onProperty :awardYear ;
                                       owl:hasValue %s
                                     ] ;
                 rdfs:subClassOf :%s . """ % award_type, award_year, award_year, award_type