In [109]:
import pdb

from decomp import UDSCorpus
from tqdm import tqdm

In [46]:
%time uds = UDSCorpus(split='train', version='1.0')

CPU times: user 12.1 s, sys: 2.57 s, total: 14.6 s
Wall time: 14.6 s


In [173]:
uds_first1000 = {}

for i, (k, v) in enumerate(uds.items()):
    if i >= 1000:
        break
    uds_first1000[k] = v

## Data explorarion

In this homework we will build binary classifiers for five different semantic roles, using UDS dataset:
* `AGENT`
* `PATIENT`
* `THEME`
* `INSTRUMENT`
* `BENEFICIARY`

Formally, we define these roles in terms of UDS properties in the following way:

`AGENT := ((volition > 0) ∨ (instigation > 0)) ∧ (existed_before > 0)`

`PATIENT := (volition < 0) ∧ (instigation < 0) ∧ (change_of_state > 0)`

`THEME := (volition < 0) ∧ (instigation < 0) ∧ (change_of_state < 0)`

`INSTRUMENT := (was_used > 0) ∧ (existed_during > 0) ∧ (volition < 0)`

`BENEFICIARY := (instigation < 0) ∧ (was_for_benefit > 0)`

In [175]:
agent_query_str = """
                   SELECT ?edge
                   WHERE { ?pred ?edge ?arg ;
                                 <domain> <semantics> ;
                                 <type>   <predicate> ;
                           { ?edge <existed_before> ?existed_before
                                   FILTER ( ?existed_before > 0 )
                           }
                           { ?edge <volition> ?volition
                                   FILTER ( ?volition > 0 )
                           } UNION
                           { ?edge <instigation> ?instigation
                                   FILTER ( ?instigation > 0 )
                           }
                         }
                   """

In [176]:
patient_query_str = """
                   SELECT ?edge
                   WHERE { ?pred ?edge ?arg ;
                                 <domain> <semantics> ;
                                 <type>   <predicate> ;
                           { ?edge <change_of_state> ?change_of_state
                                   FILTER ( ?change_of_state > 0 )
                           }
                           { ?edge <volition> ?volition
                                   FILTER ( ?volition < 0 )
                           }
                           { ?edge <instigation> ?instigation
                                   FILTER ( ?instigation < 0 )
                           }
                         }
                   """

In [185]:
theme_query_str = """
                   SELECT ?edge
                   WHERE { ?pred ?edge ?arg ;
                                 <domain> <semantics> ;
                                 <type>   <predicate> ;
                           { ?edge <change_of_state> ?change_of_state
                                   FILTER ( ?change_of_state < 0 )
                           }
                           { ?edge <volition> ?volition
                                   FILTER ( ?volition < 0 )
                           }
                           { ?edge <instigation> ?instigation
                                   FILTER ( ?instigation < 0 )
                           }
                         }
                   """

In [189]:
instrument_query_str = """
                   SELECT ?edge
                   WHERE { ?pred ?edge ?arg ;
                                 <domain> <semantics> ;
                                 <type>   <predicate> ;
                           { ?edge <was_used> ?was_used
                                   FILTER ( ?was_used > 0 )
                           }
                           { ?edge <existed_during> ?existed_during
                                   FILTER ( ?existed_during > 0 )
                           }
                           { ?edge <volition> ?volition
                                   FILTER ( ?volition < 0 )
                           }
                         }
                   """

In [190]:
beneficiary_query_str = """
                   SELECT ?edge
                   WHERE { ?pred ?edge ?arg ;
                                 <domain> <semantics> ;
                                 <type>   <predicate> ;
                           { ?edge <instigation> ?instigation
                                   FILTER ( ?instigation < 0 )
                           }
                           { ?edge <was_for_benefit> ?was_for_benefit
                                   FILTER ( ?was_for_benefit > 0 )
                           }
                         }
                   """

In [179]:
cause_query_str = """
                   SELECT ?edge
                   WHERE { ?pred ?edge ?arg ;
                                 <domain> <semantics> ;
                                 <type>   <predicate> ;
                           { ?edge <sentient> ?sentient
                                   FILTER ( ?sentient < 0 )
                           }
                           { ?edge <instigation> ?instigation
                                   FILTER ( ?instigation > 0 )
                           }
                         }
                   """

In [180]:
%%time

agent_results = {gid: graph.query(agent_query_str, query_type='edge', cache_rdf=False)
                 for gid, graph in uds_first1000.items()}

print(sum(len(v) for v in agent_results.values()))

374
CPU times: user 46 s, sys: 0 ns, total: 46 s
Wall time: 46 s


In [181]:
%%time

patient_results = {gid: graph.query(patient_query_str, query_type='edge', cache_rdf=False)
                   for gid, graph in uds_first1000.items()}

print(sum(len(v) for v in patient_results.values()))

64
CPU times: user 44.1 s, sys: 0 ns, total: 44.1 s
Wall time: 44.1 s


In [186]:
%%time

theme_results = {gid: graph.query(theme_query_str, query_type='edge', cache_rdf=False)
                   for gid, graph in uds_first1000.items()}

print(sum(len(v) for v in theme_results.values()))

104
CPU times: user 46.4 s, sys: 10.1 ms, total: 46.4 s
Wall time: 46.4 s


In [191]:
%%time

instrument_results = {gid: graph.query(instrument_query_str, query_type='edge', cache_rdf=False)
                      for gid, graph in uds_first1000.items()}

print(sum(len(v) for v in instrument_results.values()))

164
CPU times: user 43.9 s, sys: 3.87 ms, total: 43.9 s
Wall time: 43.9 s


In [192]:
%%time

beneficiary_results = {gid: graph.query(beneficiary_query_str, query_type='edge', cache_rdf=False)
                      for gid, graph in uds_first1000.items()}

print(sum(len(v) for v in beneficiary_results.values()))

42
CPU times: user 45.7 s, sys: 24.6 ms, total: 45.7 s
Wall time: 45.7 s


---