# Description
- This notebook contains all the 5 functionalities of the Time Aware Knowledge Graphs (TAKG) project.

In [1]:
import pandas as pd
from constants import TABLES, STANDARD_TRIPLES, SOURCES,PREDICATES, PLAYER_COLUMNS, TEAM_COLUMNS, TEMPORAL_DATA_COLUMNS, \
    PLAYER_COLUMN_PREDICATE_MAPPING, TEAM_COLUMN_PREDICATE_MAPPING, AWARD_COLUMNS, AWARD_COLUMN_PREDICATE_MAPPING, \
     AWARD_TEMPORAL_COLUMNS, EVENT_COLUMNS, EVENT_COLUMN_PREDICATE_MAPPING, EVENT_TEMPORAL_COLUMNS
from util import get_data, get_all_data, get_row_count, insert_data_to_table, query_TAKG, entity_data_to_triples, \
      entity_to_parent, add_statement_id, transform_player_concept_results, get_player_award_insertion_data,  \
     get_player_event_participation_data, get_player_transfer_market_data, get_player_market_value_insertion_data, \
     get_player_transfer_market_data, process_player_transfer_market_data, transform_player_SPARQL_results, \
     player_played_for_data

## Functionality 1
### What awards did the player won and when? e.g. European golden shoe
#### Source : Wikidata

#### 1.1 Existing Knowledge Search

In [2]:
q = """SELECT
  t.subject AS player_uri,t.object AS award_uri,start,end,time_point,source,insertion_time,
  MAX(CASE WHEN p.predicate = 'hasFullName' THEN p.object END) AS 'Player Name',
  MAX(CASE WHEN p.predicate = 'hasWikiID' THEN p.object END) AS 'player_wikiID',
  MAX(CASE WHEN p.predicate = 'hasTransferMarketID' THEN p.object END) AS 'TransferMarketID',
  MAX(CASE WHEN awd.predicate = 'RDFS.label' THEN awd.object END) AS 'award',
  MAX(CASE WHEN awd.predicate = 'hasWikiID' THEN awd.object END) AS 'award_wikiID'

FROM KG t
JOIN KG p
  ON t.subject = p.subject
JOIN KG awd
  ON t.object=awd.subject

INNER JOIN Temporal ON t.statement_id=Temporal.statement_id
INNER JOIN Metadata ON Temporal.retrieval_id = Metadata.retrieval_id


WHERE t.subject ='TAKG.10' AND t.predicate = 'wasAwarded'

GROUP BY t.subject,t.object,insertion_time

"""

In [3]:
query_TAKG(q)

Unnamed: 0,player_uri,award_uri,start,end,time_point,source,insertion_time,Player Name,player_wikiID,TransferMarketID,award,award_wikiID
0,TAKG.10,TAKG.Q233454,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:14:34.491151,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,European Golden Shoe,http://www.wikidata.org/entity/Q233454
1,TAKG.10,TAKG.Q233454,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:16:23.941867,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,European Golden Shoe,http://www.wikidata.org/entity/Q233454
2,TAKG.10,TAKG.Q311830,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:14:34.491151,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,Footballer of the Year,http://www.wikidata.org/entity/Q311830
3,TAKG.10,TAKG.Q311830,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:16:23.941867,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,Footballer of the Year,http://www.wikidata.org/entity/Q311830


#### 1.2 Query External Source

In [4]:
player_award_query = """PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

SELECT DISTINCT  ?player ?transferMarketID ?award ?awardLabel ?time_point
WHERE {

  ?player p:P54 ?team_statement;
               p:P2446 ?transferMarket_statement;
               p:P166 ?award_statement.
  
  ?transferMarket_statement  ps:P2446 ?transferMarketID.
  ?award_statement pq:P585 ?time_point;
                   ps:P166 ?award.
  ?award rdfs:label ?awardLabel.

  #FILTER (?teamType = wd:Q476028) #football clubs
  #FILTER (((year(?startTime) < ?givenStart && year(?endTime) > ?givenStart)) || ((year(?startTime) < ?givenEnd && year(?endTime) > ?givenEnd)) ) #time filter
  FILTER (langMatches( lang(?awardLabel), "EN" ) )
  VALUES ?transferMarketID {'10'} #given player
  
  }
"""

In [5]:
#2. Get External Knowledge & Transform
player_awards_results = get_all_data(player_award_query)
transformed_player_award_data = transform_player_concept_results(player_awards_results,'award')
transformed_player_award_data

Unnamed: 0,transferMarketID,player,award,time_point,awardLabel,player_uri,award_uri
0,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q233454,2006-01-01T00:00:00Z,European Golden Shoe,TAKG.10,TAKG.Q233454
1,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q311830,2006-01-01T00:00:00Z,Footballer of the Year,TAKG.10,TAKG.Q311830


#### 1.3 Construct Data

In [6]:
award_data = get_data(transformed_player_award_data,AWARD_COLUMNS)
#print('#####award T Box####\n')
classes_to_parent_triples = entity_to_parent(award_data)
#print(classes_to_parent_triples)

#award triples
award_triples = entity_data_to_triples(award_data,'award_uri',AWARD_COLUMN_PREDICATE_MAPPING)
#print('####### award A Box ########\n')
#print(award_triples.head())

#print('####### Making Quintiples for insertion into KG table#########\n')

classes_KG_data = add_statement_id(classes_to_parent_triples,STANDARD_TRIPLES)
#print('#####award Classes Statements####\n')
#print(classes_KG_data.head())

award_KG_data = add_statement_id(award_triples,STANDARD_TRIPLES)
#print('#####award Triples Statements####\n')
#print(award_KG_data.head())

#award temporal data
player_award_temporal_data = get_data(transformed_player_award_data,AWARD_TEMPORAL_COLUMNS)
#print('####### Player_award_TEMPORAL Data ########\n')
#print(player_award_temporal_data)

player_award_kg_data,player_award_temporal_data,player_award_metadata = get_player_award_insertion_data(transformed_player_award_data,SOURCES['wiki_source'], \
     PREDICATES['player_award'])

print('#####award KG Statements####\n')
print(player_award_kg_data.head())
print('#####award Temporal Statements####\n')
print(player_award_temporal_data.head())
print('#####award Metadata Statements####\n')
print(player_award_metadata.head())

#####award KG Statements####

   subject   predicate        object          statement_id
0  TAKG.10  wasAwarded  TAKG.Q233454  11273502741218492243
1  TAKG.10  wasAwarded  TAKG.Q311830  16254692140755045261
#####award Temporal Statements####

           statement_id start end            time_point         retrieval_id
0  11273502741218492243            2006-01-01T00:00:00Z  5615688190346538786
1  16254692140755045261            2006-01-01T00:00:00Z  5615688190346538786
#####award Metadata Statements####

          retrieval_id source             insertion_time
0  5615688190346538786   Wiki 2023-03-22 20:40:12.631751


#### 1.4 Insert Data

In [26]:
print('#####Counts BEFORE Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])

#####Counts BEFORE Insertion#####

Total KG row count:  50818
Total Temporal row count:  15647
Total Metadata row count:  2


In [27]:
#4. Insert the player award  data

insert_data_to_table(classes_KG_data,'KG')
insert_data_to_table(award_KG_data,'KG')
insert_data_to_table(player_award_kg_data,'KG')
insert_data_to_table(player_award_temporal_data,'Temporal')
insert_data_to_table(player_award_metadata,'Metadata')

print('\n#####Counts AFTER Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])


#####Counts AFTER Insertion#####

Total KG row count:  50828
Total Temporal row count:  15649
Total Metadata row count:  3


In [7]:
#connect to database and retrieve results
query_TAKG(q)

Unnamed: 0,player_uri,award_uri,start,end,time_point,source,insertion_time,Player Name,player_wikiID,TransferMarketID,award,award_wikiID
0,TAKG.10,TAKG.Q233454,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:14:34.491151,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,European Golden Shoe,http://www.wikidata.org/entity/Q233454
1,TAKG.10,TAKG.Q233454,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:16:23.941867,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,European Golden Shoe,http://www.wikidata.org/entity/Q233454
2,TAKG.10,TAKG.Q311830,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:14:34.491151,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,Footballer of the Year,http://www.wikidata.org/entity/Q311830
3,TAKG.10,TAKG.Q311830,,,2006-01-01T00:00:00Z,Wiki,2023-03-22 14:16:23.941867,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,Footballer of the Year,http://www.wikidata.org/entity/Q311830


## Functionality 2
### What major footballing events did the player participate in? e.g World Cup
#### Source : Wikidata

#### 1.1 Existing Knowledge Search

In [8]:
q = """SELECT
  t.subject AS player_uri,t.object AS event_uri,strftime('%Y',start) AS year,start,end,time_point,source,insertion_time,
  MAX(CASE WHEN p.predicate = 'hasFullName' THEN p.object END) AS 'Player Name',
  MAX(CASE WHEN p.predicate = 'hasWikiID' THEN p.object END) AS 'player_wikiID',
  MAX(CASE WHEN p.predicate = 'hasTransferMarketID' THEN p.object END) AS 'TransferMarketID',
  MAX(CASE WHEN event.predicate = 'RDFS.label' THEN event.object END) AS 'event',
  MAX(CASE WHEN event.predicate = 'hasWikiID' THEN event.object END) AS 'event_wikiID'

FROM KG t
JOIN KG p
  ON t.subject = p.subject
JOIN KG event
  ON t.object=event.subject

INNER JOIN Temporal ON t.statement_id=Temporal.statement_id
INNER JOIN Metadata ON Temporal.retrieval_id = Metadata.retrieval_id


WHERE t.subject ='TAKG.10' AND t.predicate = 'participatedIn'

GROUP BY t.subject,t.object,insertion_time

"""

In [9]:
query_TAKG(q)

Unnamed: 0,player_uri,event_uri,year,start,end,time_point,source,insertion_time,Player Name,player_wikiID,TransferMarketID,event,event_wikiID
0,TAKG.10,TAKG.Q102920,2004,2004-06-12T00:00:00Z,2004-07-04T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2004,http://www.wikidata.org/entity/Q102920
1,TAKG.10,TAKG.Q102920,2004,2004-06-12T00:00:00Z,2004-07-04T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2004,http://www.wikidata.org/entity/Q102920
2,TAKG.10,TAKG.Q176883,2010,2010-06-11T00:00:00Z,2010-07-11T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2010 FIFA World Cup,http://www.wikidata.org/entity/Q176883
3,TAKG.10,TAKG.Q176883,2010,2010-06-11T00:00:00Z,2010-07-11T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2010 FIFA World Cup,http://www.wikidata.org/entity/Q176883
4,TAKG.10,TAKG.Q22669,2012,2012-06-08T00:00:00Z,2012-07-01T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2012,http://www.wikidata.org/entity/Q22669
5,TAKG.10,TAKG.Q22669,2012,2012-06-08T00:00:00Z,2012-07-01T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2012,http://www.wikidata.org/entity/Q22669
6,TAKG.10,TAKG.Q241864,2008,2008-06-08T00:00:00Z,2008-06-29T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2008,http://www.wikidata.org/entity/Q241864
7,TAKG.10,TAKG.Q241864,2008,2008-06-08T00:00:00Z,2008-06-29T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2008,http://www.wikidata.org/entity/Q241864
8,TAKG.10,TAKG.Q37285,2006,2006-06-09T00:00:00Z,2006-07-09T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2006 FIFA World Cup,http://www.wikidata.org/entity/Q37285
9,TAKG.10,TAKG.Q37285,2006,2006-06-09T00:00:00Z,2006-07-09T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2006 FIFA World Cup,http://www.wikidata.org/entity/Q37285


#### 1.2 Query External Source

In [10]:
major_events_query = """
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

SELECT DISTINCT  ?player ?transferMarketID ?event ?eventLabel ?time_point ?startTime ?endTime
WHERE {

  ?player p:P54 ?team_statement;
               p:P2446 ?transferMarket_statement;
               p:P1344 ?particpant_statement.
  
  ?transferMarket_statement  ps:P2446 ?transferMarketID.
  ?particpant_statement ps:P1344 ?event.
  ?event rdfs:label ?eventLabel.
     OPTIONAL{?event p:P580 ?startTimeSTMT} .
     OPTIONAL{?event p:P582 ?endTimeSTMT} .
     OPTIONAL{?event p:P585 ?time_pointSTMT}.
  
     OPTIONAL{?startTimeSTMT ps:P580 ?startTime} .
     OPTIONAL{?endTimeSTMT ps:P582 ?endTime} .
     OPTIONAL{?time_pointSTMT ps:P585 ?time_point}.
                    
         
   
   
  #FILTER (?teamType = wd:Q476028) #football clubs
  #FILTER (((year(?startTime) < ?givenStart && year(?endTime) > ?givenStart)) || ((year(?startTime) < ?givenEnd && year(?endTime) > ?givenEnd)) ) #time filter
  FILTER (langMatches( lang(?eventLabel), "EN" ) )
  VALUES ?transferMarketID {'10'} #given player
  VALUES ?givenStart {2008} #given start time
  VALUES ?givenEnd {2012} #given end time
  
  }
"""

In [11]:
#2. Get External Knowledge & Transform
player_event_participation_results = get_all_data(major_events_query)
transformed_player_event_participation_data = transform_player_concept_results(player_event_participation_results,'event')

In [12]:
#transformed_player_event_participation_data.head()

Unnamed: 0,transferMarketID,player,event,eventLabel,start,end,time_point,player_uri,event_uri
0,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q22669,UEFA Euro 2012,2012-06-08T00:00:00Z,2012-07-01T00:00:00Z,2012-01-01T00:00:00Z,TAKG.10,TAKG.Q22669
3,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q37285,2006 FIFA World Cup,2006-06-09T00:00:00Z,2006-07-09T00:00:00Z,2006-01-01T00:00:00Z,TAKG.10,TAKG.Q37285
6,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q47735,2002 FIFA World Cup,2002-05-30T00:00:00Z,2002-06-29T00:00:00Z,2002-01-01T00:00:00Z,TAKG.10,TAKG.Q47735
7,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q79859,2014 FIFA World Cup,2014-06-12T00:00:00Z,2014-07-13T00:00:00Z,2014-01-01T00:00:00Z,TAKG.10,TAKG.Q79859
9,10,http://www.wikidata.org/entity/Q80471,http://www.wikidata.org/entity/Q102920,UEFA Euro 2004,2004-06-12T00:00:00Z,2004-07-04T00:00:00Z,2004-01-01T00:00:00Z,TAKG.10,TAKG.Q102920


#### 1.3 Construct Data

In [13]:
#3. construct Data
#print('#####Event T Box####\n')
classes_to_parent_triples = entity_to_parent(transformed_player_event_participation_data[EVENT_COLUMNS])
#print(classes_to_parent_triples)

#award triples
event_data = get_data(transformed_player_event_participation_data,EVENT_COLUMNS)
event_triples = entity_data_to_triples(event_data,'event_uri',EVENT_COLUMN_PREDICATE_MAPPING)
#print('####### Event A Box ########\n')
#print(event_triples.head())

#print('####### Making Quintiples for insertion into KG table#########\n')

classes_KG_data = add_statement_id(classes_to_parent_triples,STANDARD_TRIPLES)
#print('#####Event Classes Statements####\n')
#print(classes_KG_data.head())
event_KG_data = add_statement_id(event_triples,STANDARD_TRIPLES)
#print('#####Event Triples Statements####\n')
#print(event_KG_data.head())

#event participation temporal data
player_event_temporal_data = get_data(transformed_player_event_participation_data,EVENT_TEMPORAL_COLUMNS)
#print('####### Player_event_participation_TEMPORAL Data ########\n')
#print(player_event_temporal_data)

player_event_participation_kg_data,player_event_participation_temporal_data,player_event_participation_metadata = \
get_player_event_participation_data(player_event_temporal_data,SOURCES['wiki_source'], PREDICATES['player_participation'])


print('#####Event KG Statements####\n')
print(player_event_participation_kg_data.head())
print('#####Event Temporal Statements####\n')
print(player_event_participation_temporal_data.head())
print('#####Event Metadata Statements####\n')
print(player_event_participation_metadata.head())

#####Event KG Statements####

   subject       predicate        object          statement_id
0  TAKG.10  participatedIn   TAKG.Q22669  14040001430236160330
1  TAKG.10  participatedIn   TAKG.Q37285    781585610557066065
2  TAKG.10  participatedIn   TAKG.Q47735   8505195811759314565
3  TAKG.10  participatedIn   TAKG.Q79859  14646103263795665356
4  TAKG.10  participatedIn  TAKG.Q102920  11934355370868137415
#####Event Temporal Statements####

           statement_id                 start                   end  \
0  14040001430236160330  2012-06-08T00:00:00Z  2012-07-01T00:00:00Z   
1    781585610557066065  2006-06-09T00:00:00Z  2006-07-09T00:00:00Z   
2   8505195811759314565  2002-05-30T00:00:00Z  2002-06-29T00:00:00Z   
3  14646103263795665356  2014-06-12T00:00:00Z  2014-07-13T00:00:00Z   
4  11934355370868137415  2004-06-12T00:00:00Z  2004-07-04T00:00:00Z   

  time_point          retrieval_id  
0             18175236214666531753  
1             18175236214666531753  
2             1817

#### 1.4 Insert Data

In [14]:
print('#####Counts BEFORE Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])

#####Counts BEFORE Insertion#####

Total KG row count:  51327
Total Temporal row count:  15727
Total Metadata row count:  10


In [55]:
#4. Insert the player award  data

insert_data_to_table(classes_KG_data,TABLES['base_KG_table'])
insert_data_to_table(event_KG_data,TABLES['base_KG_table'])
insert_data_to_table(player_event_participation_kg_data,TABLES['base_KG_table'])
insert_data_to_table(player_event_participation_temporal_data,TABLES['temporal_meta_table'])
insert_data_to_table(player_event_participation_metadata,TABLES['meta_table'])

print('\n#####Counts AFTER Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])


#####Counts AFTER Insertion#####

Total KG row count:  50898
Total Temporal row count:  15663
Total Metadata row count:  5


In [56]:
#connect to database and retrieve results
query_TAKG(q)

Unnamed: 0,player_uri,event_uri,year,start,end,time_point,source,insertion_time,Player Name,player_wikiID,TransferMarketID,event,event_wikiID
0,TAKG.10,TAKG.Q102920,2004,2004-06-12T00:00:00Z,2004-07-04T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2004,http://www.wikidata.org/entity/Q102920
1,TAKG.10,TAKG.Q102920,2004,2004-06-12T00:00:00Z,2004-07-04T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2004,http://www.wikidata.org/entity/Q102920
2,TAKG.10,TAKG.Q176883,2010,2010-06-11T00:00:00Z,2010-07-11T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2010 FIFA World Cup,http://www.wikidata.org/entity/Q176883
3,TAKG.10,TAKG.Q176883,2010,2010-06-11T00:00:00Z,2010-07-11T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2010 FIFA World Cup,http://www.wikidata.org/entity/Q176883
4,TAKG.10,TAKG.Q22669,2012,2012-06-08T00:00:00Z,2012-07-01T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2012,http://www.wikidata.org/entity/Q22669
5,TAKG.10,TAKG.Q22669,2012,2012-06-08T00:00:00Z,2012-07-01T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2012,http://www.wikidata.org/entity/Q22669
6,TAKG.10,TAKG.Q241864,2008,2008-06-08T00:00:00Z,2008-06-29T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2008,http://www.wikidata.org/entity/Q241864
7,TAKG.10,TAKG.Q241864,2008,2008-06-08T00:00:00Z,2008-06-29T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,UEFA Euro 2008,http://www.wikidata.org/entity/Q241864
8,TAKG.10,TAKG.Q37285,2006,2006-06-09T00:00:00Z,2006-07-09T00:00:00Z,,Wiki,2023-03-22 14:29:36.226771,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2006 FIFA World Cup,http://www.wikidata.org/entity/Q37285
9,TAKG.10,TAKG.Q37285,2006,2006-06-09T00:00:00Z,2006-07-09T00:00:00Z,,Wiki,2023-03-22 14:33:03.529452,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10,2006 FIFA World Cup,http://www.wikidata.org/entity/Q37285


## Functionality 3
### What is the current and maximum market value (& when) of a given player?
#### Source : Transfer Market

#### 1.1 Existing Knowledge Search

In [15]:
q = """SELECT
  t.subject AS player_uri,t.predicate,t.object AS player_value,start,end,time_point,source,insertion_time,
  MAX(CASE WHEN p.predicate = 'hasFullName' THEN p.object END) AS 'Player Name',
  MAX(CASE WHEN p.predicate = 'hasWikiID' THEN p.object END) AS 'player_wikiID',
  MAX(CASE WHEN p.predicate = 'hasTransferMarketID' THEN p.object END) AS 'TransferMarketID'

FROM KG t
JOIN KG p
  ON t.subject = p.subject

INNER JOIN Temporal ON t.statement_id=Temporal.statement_id
INNER JOIN Metadata ON Temporal.retrieval_id = Metadata.retrieval_id


WHERE t.subject ='TAKG.10' AND t.predicate = 'hasMaximunMarketValue' OR 
      t.subject ='TAKG.10' AND t.predicate = 'hasCurrentMarketValue'

GROUP BY t.subject,t.predicate,t.object, insertion_time

"""

In [16]:
query_TAKG(q)

Unnamed: 0,player_uri,predicate,player_value,start,end,time_point,source,insertion_time,Player Name,player_wikiID,TransferMarketID
0,TAKG.10,hasCurrentMarketValue,-,,,2023-03-22T14:39:56Z,TFM,2023-03-22 14:39:56.188698,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10
1,TAKG.10,hasCurrentMarketValue,-,,,2023-03-22T14:43:44Z,TFM,2023-03-22 14:43:44.410469,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10
2,TAKG.10,hasMaximunMarketValue,€30.00m,,,2006-07-15T00:00:00Z,TFM,2023-03-22 14:39:56.188698,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10
3,TAKG.10,hasMaximunMarketValue,€30.00m,,,2006-07-15T00:00:00Z,TFM,2023-03-22 14:43:44.410469,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10


#### 1.2 Query External Source

In [17]:
#2. Get External Knowledge & Transform
_, mv_data = get_player_transfer_market_data('10')

getting player data from transfer market ...
https://www.transfermarkt.com/transfermarkt/profil/spieler/10


In [18]:
print(mv_data)

{'max_market_value': '€30.00m', 'max_value_date': 'Jul 15, 2006', 'current_market_value': '-', 'transferMarketID': '10'}


#### 1.3 Construct Data

In [19]:

mv_kg_data,mv_temporal_data,mv_metadata = get_player_market_value_insertion_data(mv_data)

In [75]:
print('\n###### KG Data #####\n')
print(mv_kg_data)
print('\n###### Temporal Data #####\n')
print(mv_temporal_data)
print('\n###### Metadata #####\n')
print(mv_metadata)



###### KG Data #####

   subject              predicate   object          statement_id
0  TAKG.10  hasMaximunMarketValue  €30.00m  15906356528659298716
1  TAKG.10  hasCurrentMarketValue        -   7138252353276693471

###### Temporal Data #####

           statement_id start end            time_point         retrieval_id
0  15906356528659298716            2006-07-15T00:00:00Z  1215529535641683369
1   7138252353276693471            2023-03-22T14:43:44Z  1215529535641683369

###### Metadata #####

          retrieval_id source             insertion_time
0  1215529535641683369    TFM 2023-03-22 14:43:44.410469


#### 1.4 Insert Data

In [20]:
print('#####Counts BEFORE Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])

#####Counts BEFORE Insertion#####

Total KG row count:  51327
Total Temporal row count:  15727
Total Metadata row count:  10


In [76]:
#insert processed Transfer market player data into respective tables
insert_data_to_table(mv_kg_data,TABLES['base_KG_table'])
insert_data_to_table(mv_temporal_data,TABLES['temporal_meta_table'])
insert_data_to_table(mv_metadata,TABLES['meta_table'])

print('\n#####Counts AFTER Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])


#####Counts AFTER Insertion#####

Total KG row count:  50902
Total Temporal row count:  15667
Total Metadata row count:  7


In [77]:
#connect to database and retrieve results
query_TAKG(q)

Unnamed: 0,player_uri,predicate,player_value,start,end,time_point,source,insertion_time,Player Name,player_wikiID,TransferMarketID
0,TAKG.10,hasCurrentMarketValue,-,,,2023-03-22T14:39:56Z,TFM,2023-03-22 14:39:56.188698,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10
1,TAKG.10,hasCurrentMarketValue,-,,,2023-03-22T14:43:44Z,TFM,2023-03-22 14:43:44.410469,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10
2,TAKG.10,hasMaximunMarketValue,€30.00m,,,2006-07-15T00:00:00Z,TFM,2023-03-22 14:39:56.188698,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10
3,TAKG.10,hasMaximunMarketValue,€30.00m,,,2006-07-15T00:00:00Z,TFM,2023-03-22 14:43:44.410469,Miroslav Klose,http://www.wikidata.org/entity/Q80471,10


## Functionality 4
### What is the football club transfer history of a given player?
#### External Source : Transfer Market

#### 1.1 Existing Knowledge Search

In [21]:
#query
q="""SELECT
  t.subject AS player_uri,t.object AS team_uri,start,end,time_point,source,insertion_time,
  MAX(CASE WHEN p.predicate = 'hasFullName' THEN p.object END) AS 'Player Name',
  MAX(CASE WHEN p.predicate = 'hasWikiID' THEN p.object END) AS 'player_wikiID',
  MAX(CASE WHEN p.predicate = 'hasTransferMarketID' THEN p.object END) AS 'TransferMarketID',
  MAX(CASE WHEN tm.predicate = 'RDFS.label' THEN tm.object END) AS 'Team Name',
  MAX(CASE WHEN tm.predicate = 'hasWikiID' THEN tm.object END) AS 'team_wikiID'

FROM KG t
JOIN KG p
  ON t.subject = p.subject
JOIN KG tm
    ON t.object=tm.subject
INNER JOIN Temporal ON t.statement_id=Temporal.statement_id
INNER JOIN Metadata ON Temporal.retrieval_id = Metadata.retrieval_id



WHERE t.predicate = 'playedFor' AND t.subject='TAKG.10'

GROUP BY t.subject,t.object,start,end,source,insertion_time

  """

In [22]:
query_TAKG(q)[['player_uri','Player Name','team_uri','Team Name','start', 'end', 'time_point', 'source', 'insertion_time', 'player_wikiID', 'TransferMarketID', 'team_wikiID']]

Unnamed: 0,player_uri,Player Name,team_uri,Team Name,start,end,time_point,source,insertion_time,player_wikiID,TransferMarketID,team_wikiID
0,TAKG.10,Miroslav Klose,TAKG.11451,FC Homburg II,1998-07-01T00:00:00Z,1999-01-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,
1,TAKG.10,Miroslav Klose,TAKG.11451,FC Homburg II,1998-07-01T00:00:00Z,1999-01-01T00:00:00Z,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,
2,TAKG.10,Miroslav Klose,TAKG.123,Retired,2016-07-01T00:00:00Z,,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q152984
3,TAKG.10,Miroslav Klose,TAKG.123,Retired,2016-07-01T00:00:00Z,,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q152984
4,TAKG.10,Miroslav Klose,TAKG.151,K'lautern II,1999-07-01T00:00:00Z,2000-07-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,
5,TAKG.10,Miroslav Klose,TAKG.151,K'lautern II,1999-07-01T00:00:00Z,2000-07-01T00:00:00Z,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,
6,TAKG.10,Miroslav Klose,TAKG.1768,SG Blaubach-Diedelkopf,1997-07-01T00:00:00Z,1998-07-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,
7,TAKG.10,Miroslav Klose,TAKG.1768,SG Blaubach-Diedelkopf,1997-07-01T00:00:00Z,1998-07-01T00:00:00Z,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,
8,TAKG.10,Miroslav Klose,TAKG.2,1.FC K'lautern,1999-01-01T00:00:00Z,2001-01-01T00:00:00Z,,Wiki,2023-03-22 13:52:07.237994,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q8466
9,TAKG.10,Miroslav Klose,TAKG.2,1.FC K'lautern,2000-07-01T00:00:00Z,2004-07-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q8466


#### 1.2 Query External Source

In [23]:
#2. Get External Knowledge & Transform
player_tfm_data, _ = get_player_transfer_market_data('10')

getting player data from transfer market ...
https://www.transfermarkt.com/transfermarkt/profil/spieler/10


In [24]:
player_tfm_data

Unnamed: 0,transferMarketID,transferMarketTeamID,teamLabel,start,end
0,10,123,Retired,2016-07-01T00:00:00Z,
1,10,398,Lazio,2011-07-01T00:00:00Z,2016-07-01T00:00:00Z
2,10,27,Bayern Munich,2007-07-01T00:00:00Z,2011-07-01T00:00:00Z
3,10,86,Werder Bremen,2004-07-01T00:00:00Z,2007-07-01T00:00:00Z
4,10,2,1.FC K'lautern,2000-07-01T00:00:00Z,2004-07-01T00:00:00Z
5,10,151,K'lautern II,1999-07-01T00:00:00Z,2000-07-01T00:00:00Z
6,10,459,FC 08 Homburg,1999-01-01T00:00:00Z,1999-07-01T00:00:00Z
7,10,11451,FC Homburg II,1998-07-01T00:00:00Z,1999-01-01T00:00:00Z
8,10,1768,SG Blaubach-Diedelkopf,1997-07-01T00:00:00Z,1998-07-01T00:00:00Z
9,10,29091,SG Blaubach-Diedelkopf U19,1995-07-01T00:00:00Z,1997-07-01T00:00:00Z


#### 1.3 Construct Data

In [25]:
tm_team_KG_data,tm_player_played_for_kg_data,tm_player_temporal_data,tm_player_metadata = \
    process_player_transfer_market_data(player_tfm_data)

In [26]:
print('\n###### Team KG Data #####\n')
print(tm_team_KG_data.head(2))
print('\n###### Player KG Data #####\n')
print(tm_player_played_for_kg_data.head(2))
print('\n###### Temporal Data #####\n')
print(tm_player_temporal_data.head(2))
print('\n###### Metadata #####\n')
print(tm_player_metadata)


###### Team KG Data #####

    subject            predicate   object          statement_id
0  TAKG.123  hasTransferMarketID      123     30452761877545301
1  TAKG.123           RDFS.label  Retired  16731588989183257177

###### Player KG Data #####

   subject  predicate    object          statement_id
0  TAKG.10  playedFor  TAKG.123  10761743852176244209
1  TAKG.10  playedFor  TAKG.398  17094178305767426341

###### Temporal Data #####

           statement_id                 start                   end  \
0  10761743852176244209  2016-07-01T00:00:00Z                         
1  17094178305767426341  2011-07-01T00:00:00Z  2016-07-01T00:00:00Z   

  time_point          retrieval_id  
0             10260752227526081031  
1             10260752227526081031  

###### Metadata #####

           retrieval_id source             insertion_time
0  10260752227526081031    TFM 2023-03-22 20:43:20.761873


#### 1.4 Insert Data

In [27]:
print('#####Counts BEFORE Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])

#####Counts BEFORE Insertion#####

Total KG row count:  51327
Total Temporal row count:  15727
Total Metadata row count:  10


In [108]:
#insert processed Transfer market player data into respective tables
insert_data_to_table(tm_team_KG_data,TABLES['base_KG_table'])
insert_data_to_table(tm_player_played_for_kg_data,TABLES['base_KG_table'])
insert_data_to_table(tm_player_temporal_data,TABLES['temporal_meta_table'])
insert_data_to_table(tm_player_metadata,TABLES['meta_table'])

print('\n#####Counts AFTER Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])


#####Counts AFTER Insertion#####

Total KG row count:  50968
Total Temporal row count:  15689
Total Metadata row count:  9


In [109]:
#connect to database and retrieve results
query_TAKG(q)[['player_uri','Player Name','team_uri','Team Name','start', 'end', 'time_point', 'source', 'insertion_time', 'player_wikiID', 'TransferMarketID', 'team_wikiID']]

Unnamed: 0,player_uri,Player Name,team_uri,Team Name,start,end,time_point,source,insertion_time,player_wikiID,TransferMarketID,team_wikiID
0,TAKG.10,Miroslav Klose,TAKG.11451,FC Homburg II,1998-07-01T00:00:00Z,1999-01-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,
1,TAKG.10,Miroslav Klose,TAKG.11451,FC Homburg II,1998-07-01T00:00:00Z,1999-01-01T00:00:00Z,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,
2,TAKG.10,Miroslav Klose,TAKG.123,Retired,2016-07-01T00:00:00Z,,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q152984
3,TAKG.10,Miroslav Klose,TAKG.123,Retired,2016-07-01T00:00:00Z,,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q152984
4,TAKG.10,Miroslav Klose,TAKG.151,K'lautern II,1999-07-01T00:00:00Z,2000-07-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,
5,TAKG.10,Miroslav Klose,TAKG.151,K'lautern II,1999-07-01T00:00:00Z,2000-07-01T00:00:00Z,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,
6,TAKG.10,Miroslav Klose,TAKG.1768,SG Blaubach-Diedelkopf,1997-07-01T00:00:00Z,1998-07-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,
7,TAKG.10,Miroslav Klose,TAKG.1768,SG Blaubach-Diedelkopf,1997-07-01T00:00:00Z,1998-07-01T00:00:00Z,,TFM,2023-03-22 15:07:35.123990,http://www.wikidata.org/entity/Q80471,10,
8,TAKG.10,Miroslav Klose,TAKG.2,1.FC K'lautern,1999-01-01T00:00:00Z,2001-01-01T00:00:00Z,,Wiki,2023-03-22 13:52:07.237994,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q8466
9,TAKG.10,Miroslav Klose,TAKG.2,1.FC K'lautern,2000-07-01T00:00:00Z,2004-07-01T00:00:00Z,,TFM,2023-03-22 14:51:08.430856,http://www.wikidata.org/entity/Q80471,10,http://www.wikidata.org/entity/Q8466


## Functionality 5
### Which players played with a given player between given time range?
#### Source : Wikidata

#### 1.1 Existing Knowledge Search

q = """SELECT DISTINCT p1.subject,p1.object as given_player_team,strftime('%Y',t1.start) as given_player_start,strftime('%Y',t1.end) as given_player_start
        FROM KG p1
        INNER JOIN Temporal t1 
          ON p1.statement_id=t1.statement_id
        WHERE p1.predicate = 'playedFor' AND p1.subject = 'TAKG.6438'
    """
query_TAKG(q)

q = """SELECT DISTINCT p1.subject,p1.object as given_player_team,strftime('%Y',t1.start) as given_player_start,strftime('%Y',t1.end) as given_player_start
        FROM KG p1
        INNER JOIN Temporal t1 
          ON p1.statement_id=t1.statement_id
        WHERE p1.predicate = 'playedFor' AND 
        ((strftime('%Y',t1.start) > '2008' AND strftime('%Y',t1.start) < '2012') OR (strftime('%Y',t1.end) > '2008' AND strftime('%Y',t1.end) < '2012')) AND
        p1.subject = 'TAKG.55957'
    """
query_TAKG(q)

In [28]:
q = """WITH given AS (
        SELECT DISTINCT p1.object as given_player_team,strftime('%Y',t1.start) as given_player_start,strftime('%Y',t1.end) as given_player_end
        FROM KG p1
        INNER JOIN Temporal t1 
          ON p1.statement_id=t1.statement_id
        WHERE p1.predicate = 'playedFor' AND 
        ( (strftime('%Y',t1.start) >= '2008' AND strftime('%Y',t1.start) < '2012') OR 
          (strftime('%Y',t1.end) > '2008' AND strftime('%Y',t1.end) < '2012') OR
          (strftime('%Y',t1.start) <= '2008' AND strftime('%Y',t1.end) > '2012')
        ) AND
        p1.subject = 'TAKG.10')

  SELECT DISTINCT
  t.subject AS player_uri,t.object AS team_uri,start,end,time_point,source,insertion_time,
  MAX(CASE WHEN p.predicate = 'hasFullName' THEN p.object END) AS 'PlayerName',
  MAX(CASE WHEN p.predicate = 'hasWikiID' THEN p.object END) AS 'player_wikiID',
  MAX(CASE WHEN p.predicate = 'hasTransferMarketID' THEN p.object END) AS 'TransferMarketID',
  MAX(CASE WHEN tm.predicate = 'RDFS.label' THEN tm.object END) AS 'Team Name',
  MAX(CASE WHEN tm.predicate = 'hasWikiID' THEN tm.object END) AS 'team_wikiID'

FROM KG t
JOIN KG p
  ON t.subject = p.subject
JOIN KG tm
    ON t.object=tm.subject
INNER JOIN Temporal ON t.statement_id=Temporal.statement_id
INNER JOIN Metadata ON Temporal.retrieval_id = Metadata.retrieval_id
JOIN given ON t.object = given.given_player_team

WHERE t.predicate = 'playedFor' AND 
(   (strftime('%Y',start) >= given.given_player_start AND strftime('%Y',start) < given.given_player_end) OR 
    (strftime('%Y',end) > given.given_player_start AND strftime('%Y',end) < given.given_player_end) OR
    (strftime('%Y',start) < given.given_player_start AND strftime('%Y',end) > given.given_player_end)
        ) AND
(   (strftime('%Y',start) >= '2008' AND strftime('%Y',start) < '2012') OR 
    (strftime('%Y',end) > '2008' AND strftime('%Y',end) <= '2012') OR
    (strftime('%Y',start) < '2008' AND strftime('%Y',end) > '2012')
    ) AND

t.subject != 'TAKG.10'


GROUP BY t.subject,t.object,start,end,source,insertion_time
ORDER BY PlayerName
"""

In [29]:
existing_team_mates_knowledge = query_TAKG(q)[['player_uri','PlayerName','team_uri','Team Name','start', 'end', 'time_point', 'source', 'insertion_time', 'player_wikiID', 'TransferMarketID', 'team_wikiID']]
#existing_team_mates_knowledge

#### 1.2 Query External Source

In [30]:
team_mate_history_SPARQL_query = """PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT distinct ?fullName ?player ?DOB ?sex ?transferMarketID ?team ?teamLabel ?teamType ?startTime ?endTime ?teamCountry ?transferMarketTeamID 

WHERE{
  
    ?givenPlayer wdt:P2446 ?givenTransferMarketID.
    ?givenPlayer p:P54 ?team_statement_given.
    ?team_statement_given ps:P54 ?team;
	  pq:P580 ?startTime_given;
	  pq:P582 ?endTime_given.
    #newPlayer
    ?player wdt:P54 ?team;
            wdt:P1559 ?fullName;
            p:P54 ?team_statement_new.
    ?team_statement_new ps:P54 ?team;
	  pq:P580 ?startTime.

    OPTIONAL {?team_statement_new  pq:P582 ?endTime.}
  
    ?player wdt:P569 ?DOB;
            wdt:P2446 ?transferMarketID;
            wdt:P1559 ?fullName.
    OPTIONAL{
      ?player wdt:P569 ?DOB;
              wdt:P21 ?sex.       
    }
    ?team rdfs:label ?teamLabel;
          wdt:P17 ?teamCountry;
          wdt:P7223 ?transferMarketTeamID;
          wdt:P31 ?teamType.
  
    FILTER (langMatches(lang(?teamLabel), "EN"))
    FILTER (?teamType = wd:Q476028)
    FILTER (((?givenStart <= year(?startTime_given) && year(?startTime_given) < ?givenEnd) || (?givenStart < year(?endTime_given) && year(?endTime_given) < ?givenEnd) || (?givenStart > year(?startTime_given) && ?givenEnd < year(?endTime_given))))
    FILTER (((?startTime_given < ?startTime && ?startTime < ?endTime_given) || (?startTime_given < ?endTime && ?endTime < ?endTime_given) || (?endTime_given > ?startTime && ?endTime_given < ?endTime)))
    FILTER (((?givenStart <= year(?startTime) && year(?startTime) < ?givenEnd) || (?givenStart < year(?endTime) && year(?endTime) < ?givenEnd) || (?givenStart > year(?startTime) && ?givenEnd < year(?endTime))))
    FILTER (?transferMarketID != ?givenTransferMarketID)
    
    VALUES ?givenTransferMarketID {'10'}
    VALUES ?givenStart {2008}
    VALUES ?givenEnd {2012}
}"""

In [31]:
#2. Get External Knowledge & Transform
team_mate_results = get_all_data(team_mate_history_SPARQL_query)
transformed_team_mate_data = transform_player_SPARQL_results(team_mate_results)

In [217]:
#transformed_team_mate_data

In [33]:
#Difference between existing knowledge and the external knowledge 
df = pd.merge(transformed_team_mate_data, existing_team_mates_knowledge, on=['player_uri','player_uri'], how="outer", indicator=True
              ).query('_merge=="left_only"')
df[['player_uri','fullName','player']]

Unnamed: 0,player_uri,fullName,player


#### 1.3 Construct Data

In [34]:
classes_to_parent_triples = entity_to_parent(transformed_team_mate_data)
#print(classes_to_parent_triples.head())

#player triples
#print('#####Teammates A Box####\n')
player_data = get_data(transformed_team_mate_data,PLAYER_COLUMNS)
player_triples = entity_data_to_triples(player_data,'player_uri',PLAYER_COLUMN_PREDICATE_MAPPING)

#print('####### Player Triples ########')
#print(player_triples.head())

#print('####### Team Triples ########')
#team triples
team_data = get_data(transformed_team_mate_data,TEAM_COLUMNS)
team_triples = entity_data_to_triples(team_data,'team_uri',TEAM_COLUMN_PREDICATE_MAPPING)
#print(team_triples.head())

#print('####### Making Quintiples for insertion into KG table#########')
classes_KG_data = add_statement_id(classes_to_parent_triples,STANDARD_TRIPLES)
#print(classes_KG_data.head())
player_KG_data = add_statement_id(player_triples,STANDARD_TRIPLES)
#print(player_KG_data.head())
team_KG_data = add_statement_id(team_triples,STANDARD_TRIPLES)
#print(team_KG_data.head())

#temporal data
player_played_for_temporal_data = get_data(transformed_team_mate_data,TEMPORAL_DATA_COLUMNS)
#print('####### Player_played_FOR_TEMPORAL Data ########')
#print(player_played_for_temporal_data.head())

player_played_for_kg_data,played_for_temporal_data,played_for_metadata = \
            player_played_for_data(player_played_for_temporal_data,SOURCES['wiki_source'])

print('#####KG Statements####\n')
print(player_played_for_kg_data.head())
print('#####Temporal Statements####\n')
print(played_for_temporal_data.head())
print('#####Metadata Statements####\n')
print(played_for_metadata.head())

#####KG Statements####

      subject  predicate    object          statement_id
0  TAKG.59016  playedFor   TAKG.27  17244051403606912211
1  TAKG.54659  playedFor   TAKG.27   9218861189949334610
2  TAKG.55957  playedFor  TAKG.398  16111782321306586420
3   TAKG.6438  playedFor  TAKG.398   6819139016539893105
4  TAKG.39728  playedFor   TAKG.27  12319064164994108788
#####Temporal Statements####

           statement_id                 start                   end  \
0  17244051403606912211  2010-07-01T00:00:00Z  2021-07-01T00:00:00Z   
1   9218861189949334610  2009-01-01T00:00:00Z  2017-01-01T00:00:00Z   
2  16111782321306586420  2010-01-01T00:00:00Z  2015-01-01T00:00:00Z   
3   6819139016539893105  2006-01-01T00:00:00Z  2015-01-01T00:00:00Z   
4  12319064164994108788  2007-01-01T00:00:00Z  2009-01-01T00:00:00Z   

  time_point          retrieval_id  
0             16672210175287255135  
1             16672210175287255135  
2             16672210175287255135  
3             166722101752872

#### 1.4 Insert Data

In [222]:
print('#####Counts BEFORE Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])

#####Counts BEFORE Insertion#####

Total KG row count:  50968
Total Temporal row count:  15689
Total Metadata row count:  9


In [225]:
insert_data_to_table(classes_KG_data,TABLES['base_KG_table'])
insert_data_to_table(player_KG_data,TABLES['base_KG_table'])
insert_data_to_table(team_KG_data,TABLES['base_KG_table'])

insert_data_to_table(player_played_for_kg_data,TABLES['base_KG_table'])
insert_data_to_table(played_for_temporal_data,TABLES['temporal_meta_table'])
insert_data_to_table(played_for_metadata,TABLES['meta_table'])

print('\n#####Counts AFTER Insertion#####\n')
print('Total KG row count: ',get_row_count(TABLES['base_KG_table'])['row_count'][0])
print('Total Temporal row count: ',get_row_count(TABLES['temporal_meta_table'])['row_count'][0])
print('Total Metadata row count: ',get_row_count(TABLES['meta_table'])['row_count'][0])


#####Counts AFTER Insertion#####

Total KG row count:  51327
Total Temporal row count:  15727
Total Metadata row count:  10


In [226]:
#connect to database and retrieve results
existing_team_mates_knowledge = query_TAKG(q)[['player_uri','PlayerName','team_uri','Team Name','start', 'end', 'time_point', 'source', 'insertion_time', 'player_wikiID', 'TransferMarketID', 'team_wikiID']]
#existing_team_mates_knowledge

In [227]:
#Difference
df = pd.merge(transformed_team_mate_data, existing_team_mates_knowledge, on=['player_uri','player_uri'], how="outer", indicator=True
              ).query('_merge=="left_only"')
df[['player_uri','fullName','player']]

Unnamed: 0,player_uri,fullName,player
