# Intro

> Generate [object_per_actor_nmvw.csv](object_per_actor_nmvw.csv)

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT (COUNT(DISTINCT ?nmvw_object) AS ?no_object) ?nmvw_actor
WHERE{
  GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
  {?nmvw_acq crm:P24_transferred_title_of ?nmvw_object .
   ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .}
  UNION
  {
    ?nmvw_prod crm:P14_carried_out_by ?nmvw_actor .
    ?nmvw_object crm:P108i_was_produced_by ?nmvw_prod .
  }
  ?nmvw_object a crm:E22_Human-Made_Object .
  }
}GROUP BY ?nmvw_actor
```

In [None]:
def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

import pandas as pd

df1 = pd.read_csv("object_per_actor_nmvw.csv")
df1 = df1.iloc[: , :-1]
df1.columns = ['no_object', 'nmvw_actor']
df1['no_object'] = df1['no_object'].apply(cleanup)
df1 = df1.sort_values(by=['no_object'], ascending=False)
df1

> Generate [object_per_actor_bb.csv](object_per_actor_bb.csv)
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (COUNT(DISTINCT ?bb_obj) AS ?no_object) ?bb_actor
WHERE{
    {?bb_acq crm:P23_transferred_title_from ?bb_actor .	
    ?bb_acq crm:P24_transferred_title_of ?bb_obj .}
    UNION
    {
          ?bb_obj crm:P51_has_former_or_current_owner ?bb_actor .
    }

GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_obj a crm:E22_Human-Made_Object. }
}GROUP BY ?bb_actor
```
> Note: https://pressingmatter.nl/Bronbeek/Constituents/1896" --> "Koninklijk Tehuis voor Oud-Militairen en Museum Bronbeek"^^xsd:string
> https://pressingmatter.nl/Bronbeek/Constituents/9806 --> Nijmeegs Volkenkundig Museum
> https://pressingmatter.nl/Bronbeek/Constituents/9804 --> "Gemeente Nijmegen"
> https://pressingmatter.nl/Bronbeek/Constituents/9805 --> Jean Louis Henri Beijens

In [None]:
def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

import pandas as pd

df1 = pd.read_csv("object_per_actor_bb.csv")
df1 = df1.iloc[: , :-1]
df1.columns = ['no_object', 'bb_actor']
df1['no_object'] = df1['no_object'].apply(cleanup)
df1 = df1.sort_values(by=['no_object'], ascending=False)
df1

# CQ-1a

How many actors we have with one more objects and also have military background?

SPARQL Query:
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT (COUNT(DISTINCT ?nmvw_actor) AS ?no_actor)
WHERE{
  GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
  {?nmvw_acq crm:P24_transferred_title_of ?nmvw_object .
   ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .}
  UNION
  {
    ?nmvw_prod crm:P14_carried_out_by ?nmvw_actor .
    ?nmvw_object crm:P108i_was_produced_by ?nmvw_prod .
  }
  ?nmvw_object a crm:E22_Human-Made_Object .
  }
  ?nmvw_actor owl:sameAs ?bb_actor .
}
```

> Note that, here we consider `production actor` too! 

1. **Initial+Surname Match**: 277 actor from NMVW dataset, has connection with at least one object's provenance from NMVW dataset and also have owl:sameAS link with Bronbeek actor.
2. **Exact String Matching**: 117 actor from NMVW dataset, has connection with at least one object's provenance from NMVW dataset and also have owl:sameAS link with Bronbeek actor.
3. **Surname Matching**: 2680 actor from NMVW dataset, has connection with at least one object's provenance from NMVW dataset and also have owl:sameAS link with Bronbeek actor.
4. **Fuzzy String Matching**: 6779 actor from NMVW dataset, has connection with at least one object's provenance from NMVW dataset and also have owl:sameAS link with Bronbeek actor.
5. **Deezy Match**: 87 actor from NMVW dataset, has connection with at least one object's provenance from NMVW dataset and also have owl:sameAS link with Bronbeek actor.


# CQ 1b

Query to generate [CQ1b_nmvw_obj.csv](CQ1b_nmvw_obj.csv)
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT ?nmvw_object ?nmvw_actor
WHERE{
  GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
  {?nmvw_acq crm:P24_transferred_title_of ?nmvw_object .
   ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .}
  UNION
  {
    ?nmvw_prod crm:P14_carried_out_by ?nmvw_actor .
    ?nmvw_object crm:P108i_was_produced_by ?nmvw_prod .
  }
  ?nmvw_object a crm:E22_Human-Made_Object .
  }
  # removing "onbeked / unknown"
  FILTER (?nmvw_actor != <https://hdl.handle.net/20.500.11840/pi4332>)
}
```

In [None]:
def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

import pandas as pd

df1 = pd.read_csv("CQ1b_nmvw_obj.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df1 = df1.iloc[: , :-1]
df1.columns = ['nmvw_object', 'nmvw_actor']
df1

In [None]:
# Group by 'nmvw_actor' and count unique 'nmvw_object' per group
unique_counts = df1.groupby('nmvw_actor')['nmvw_object'].nunique()

# Filter rows where 'nmvw_actor' has less than 2 unique 'nmvw_object'
filtered_df = df1[df1['nmvw_actor'].isin(unique_counts[unique_counts > 1].index)]

print(filtered_df['nmvw_object'].nunique())

> out of 446,310 unique objects who have connection with actor, 444,517 can be linked with at least one other objects

Generate [CQ1b_bb_obj.csv](CQ1b_bb_obj.csv)

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT ?nmvw_actor (COUNT(DISTINCT ?bb_object) AS ?n_bb_object) 
WHERE{
    ?nmvw_actor owl:sameAs ?bb_actor .
    #acquisition event
    {
    	?bb_acq crm:P23_transferred_title_from ?bb_actor .   
    	?bb_acq crm:P24_transferred_title_of ?bb_object .
    }
  UNION
    {
      	?bb_object crm:P51_has_former_or_current_owner ?bb_actor .
    }
  GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_object a crm:E22_Human-Made_Object .
    }
} GROUP BY ?nmvw_actor
```

In [None]:
def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

import pandas as pd

df2 = pd.read_csv("CQ1b_bb_obj.csv")
df2 = df2.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
# df2 = df2.iloc[: , :-1]
df2.columns = ["nmvw_actor", "bb_object" ]
df2['bb_object'] = df2['bb_object'].apply(cleanup)

df2 = df2[df2['nmvw_actor'].isin(df1['nmvw_actor'])]

df2["bb_object"].sum()

In [None]:
print(f"The number of actor contributing to this object count is: {df2['nmvw_actor'].nunique()}")

> does any of the actor who have only one object, now have more than one connected object due to the connection of bronbeek,

In [None]:
df2[df2['nmvw_actor'].isin(unique_counts[unique_counts == 1].index)]['nmvw_actor'].nunique()

In [None]:
310 + 15

# CQ 1c

How many possible object to object connection we have, by establishing links via actor?

Generate [CQ1b_nmvw_obj.csv](CQ1b_nmvw_obj.csv)

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT (COUNT(DISTINCT ?nmvw_object) AS ?n_nmvw_object) ?nmvw_actor
WHERE{
  GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
  {?nmvw_acq crm:P24_transferred_title_of ?nmvw_object .
   ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .}
  UNION
  {
    ?nmvw_prod crm:P14_carried_out_by ?nmvw_actor .
    ?nmvw_object crm:P108i_was_produced_by ?nmvw_prod .
  }
  ?nmvw_object a crm:E22_Human-Made_Object .
  }
  # removing "onbeked / unknown"
  FILTER (?nmvw_actor != <https://hdl.handle.net/20.500.11840/pi4332>)
}GROUP BY ?nmvw_actor
```

> Note: https://hdl.handle.net/20.500.11840/pi40292 / "Stichting Indisch Wetenschappelijk Instituut" generate 69382 object connection but keeping this for now. 

In [None]:
def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

import math

def calculate_combinations(total_objects):
    combination_value = math.factorial(total_objects) // (math.factorial(2) * math.factorial(total_objects - 2))
    return combination_value

import pandas as pd

df1 = pd.read_csv("CQ1b_nmvw_obj.csv")
df1 = df1.iloc[: , :-1]
df1.columns = ['n_nmvw_object', 'nmvw_actor']
df1['n_nmvw_object'] = df1['n_nmvw_object'].apply(cleanup)
df1 = df1[df1.n_nmvw_object != 1]
df1 = df1.sort_values(by=['n_nmvw_object'], ascending=False)
df1['possible_connections'] = df1['n_nmvw_object'].apply(calculate_combinations)
df1['possible_connections'].sum()


In [None]:
df1

How many object to object connection we have, by establishing links via actor when we also add objects from bronbeek via the owl:sameAs connection? 

Generate [CQ1b_bb_obj.csv](CQ1b_bb_obj.csv)
SPARQL QUERY:
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT ?nmvw_actor (COUNT(DISTINCT ?bb_object) AS ?n_bb_object) 
WHERE{
    ?nmvw_actor owl:sameAs ?bb_actor .
    #acquisition event
    {
    	?bb_acq crm:P23_transferred_title_from ?bb_actor .   
    	?bb_acq crm:P24_transferred_title_of ?bb_object .
    }
  UNION
    {
      	?bb_object crm:P51_has_former_or_current_owner ?bb_actor .
    }
  GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_object a crm:E22_Human-Made_Object .
    }
} GROUP BY ?nmvw_actor

```

In [None]:
import pandas as pd

def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

import math

def calculate_combinations(total_objects):
    combination_value = math.factorial(total_objects) // (math.factorial(2) * math.factorial(total_objects - 2))
    return combination_value

import pandas as pd

df2 = pd.read_csv("CQ1b_bb_obj.csv")
df2 = df2.iloc[: , :-1]
df2.columns = ["nmvw_actor" , "n_bb_object"]
df2['n_bb_object'] = df2['n_bb_object'].apply(cleanup)
df2 = df2.sort_values(by=['n_bb_object'], ascending=False)
df2


In [None]:
# Merge the dataframes on 'nmvw_actor'
merged_df = pd.merge(df1, df2, on='nmvw_actor', how='left')
merged_df['n_bb_object'] = merged_df['n_bb_object'].fillna(0)

# Add the 'n_nmvw_object' and 'n_bb_object' columns where 'nmvw_actor' matches
merged_df['combined_values'] = merged_df['n_nmvw_object'] + merged_df['n_bb_object']

merged_df['possible_connections'] = merged_df['combined_values'].apply(calculate_combinations)
merged_df['possible_connections'].sum()

# CQ 2

SPARQL Query:
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT (COUNT(DISTINCT ?nmvw_object) AS ?n)
WHERE{
  Graph <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
    {
     ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .
     ?nmvw_acq crm:P24_transferred_title_of ?nmvw_object .
    }
    UNION
    {
      ?nmvw_object crm:P108i_was_produced_by ?nmvw_prod .
      ?nmvw_prod crm:P14_carried_out_by ?nmvw_actor .
    }
    ?nmvw_object  a crm:E22_Human-Made_Object .
  }
  ?nmvw_actor owl:sameAs ?bronbeek_actor .
}
```


# CQ 3

Generate [CQ3_nmvw_obj.csv](CQ3_nmvw_obj.csv)
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT *
WHERE{
  Graph <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
     ?nmvw_acq1 crm:P23_transferred_title_from ?nmvw_actor1 .
     ?nmvw_acq1 crm:P24_transferred_title_of ?nmvw_object .
    
    ?nmvw_acq2 crm:P24_transferred_title_of ?nmvw_object .
    ?nmvw_acq2 crm:P23_transferred_title_from ?nmvw_actor2 .
    
    Filter (?nmvw_actor1!= ?nmvw_actor2)
   }
}
```

In [None]:
import pandas as pd

df1 = pd.read_csv("CQ3_nmvw_obj.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df1 = df1.iloc[: , :-1]
df1.columns = ["nmvw_acq1" , "nmvw_actor1" , "nmvw_object" , "nmvw_acq2" , "nmvw_actor2"]

pairs = set(frozenset(pair) for pair in zip(df1['nmvw_actor1'], df1['nmvw_actor2']))
df1 = pd.DataFrame(pairs, columns=['nmvw_actor1', 'nmvw_actor2'])

print(f"Unique two person's pair in nmvw dataset is: {len(df1)}")

generate [CQ3_bb_obj.csv](CQ3_bb_obj.csv)

```
SELECT * WHERE{
     ?bb_acq1 crm:P23_transferred_title_from ?bb_actor1 .
     ?bb_acq1 crm:P24_transferred_title_of ?bb_object .
    
    ?bb_acq2 crm:P24_transferred_title_of ?bb_object .
    ?bb_acq2 crm:P23_transferred_title_from ?bb_actor2 .

    Filter (?bb_actor1!= ?bb_actor2)
Graph <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
        ?bb_object a crm:E22_Human-Made_Object.
   }
}
```

In [None]:
import pandas as pd

df2 = pd.read_csv("CQ3_bb_obj.csv")
# df2 = df2.iloc[: , :-1]
df2.columns = [  "bb_acq1" , "bb_actor1" , "bb_object" , "bb_acq2" , "bb_actor2"]

pairs = set(frozenset(pair) for pair in zip(df2['bb_actor1'], df2['bb_actor2']))
df2 = pd.DataFrame(pairs, columns=['bb_actor1', 'bb_actor2'])

print(f"Unique two person's pair in bronbeek dataset is: {len(df2)}")


Download the match results in [sameAs.csv](sameAs.csv)

SPARQL QUERY:
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT * WHERE{
  ?nmwv_actor owl:sameAs ?bb_actor .
}
```

In [None]:
match_df = pd.read_csv("sameAs.csv")
# match_df = match_df.iloc[:, :-1]
match_df.columns = [ "nmvw_actor" , "bb_actor"]

merged_df1 = df1.merge(match_df, left_on='nmvw_actor1', right_on='nmvw_actor', how='inner')
merged_df2 = df1.merge(match_df, left_on='nmvw_actor2', right_on='nmvw_actor', how='inner')
final_df = pd.concat([merged_df1, merged_df2])
final_df = final_df.drop_duplicates()

result_df1 = final_df.merge(df2, left_on='bb_actor', right_on='bb_actor1', how='inner')
result_df2 = final_df.merge(df2, left_on='bb_actor', right_on='bb_actor2', how='inner')
result_df = pd.concat([result_df2,result_df1])

In [None]:
pair_list = list()

for _, row in result_df.iterrows():
    if row['nmvw_actor1'] == row['nmvw_actor']:
        x1 = row['nmvw_actor1']
        x = row['nmvw_actor2']
    else:
        x = row['nmvw_actor1']

    if row['bb_actor1'] == row['bb_actor']:
        y =  row['bb_actor2']
    else:
        y =  row['bb_actor1']
    
    pair_list.append(frozenset((x,y)))
    try:
        pair_list.append(frozenset((x1,y)))
    except NameError:
        print(f"{row['nmvw_actor1']} never apprears in sameAs")

pair_set = set(pair_list)
pair_df = pd.DataFrame(pair_set, columns=['actor1', 'actor2'])

print(f"The number of new pairs of actor-to-actor connection added is: {len(pair_df)}")

In [None]:
all_values = pd.concat([pair_df['actor1'], pair_df['actor2']])

# Get unique values
unique_values = all_values.unique()

# Filter unique values that start with the specified prefix
prefix = 'https://hdl.handle.net/'
filtered_values = [value for value in unique_values if value.startswith(prefix)]

print(f"Unique NMVW actor: {len(filtered_values)}")

# CQ 4

##### These are the objects, where we do not know it's production place, but do know the acqusuiton actor.
> For 49152 object we did not know the production location

```PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (COUNT(DISTINCT ?nmvw_obj) AS ?no_object) ?nmvw_actor
WHERE{
 GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
    ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor.
    ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
    ?nmvw_obj crm:P108i_was_produced_by ?nmvw_prod .
    FILTER NOT EXISTS {
      ?nmvw_prod crm:P7_took_place_at ?nmvw_prod_place.}
    }
} GROUP BY ?nmvw_actor ```

##### These are the objects, where we DO KNOW it's production place, and acqusuiton actor.
> For 457722 object, we did not know the production location
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (COUNT(DISTINCT ?nmvw_obj) AS ?no_object) (COUNT(DISTINCT ?nmvw_prod_place) AS ?no_place) ?nmvw_actor
WHERE{
 GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
     ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor.
      ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
      ?nmvw_obj crm:P108i_was_produced_by ?nmvw_prod .
     ?nmvw_prod crm:P7_took_place_at ?nmvw_prod_place.
    }
} GROUP BY ?nmvw_actor 
```

> If we connect them with acqusistion_actor, for 45756 objects we can prioritise possible location .

In [None]:
import pandas as pd

df1 = pd.read_csv("CQ4_nmvw_object_place_unknown.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df2 = pd.read_csv("CQ4_nmvw_object_place_known.csv")
df2 = df2.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)

In [None]:
merged_df = pd.merge(df1, df2, on='nmvw_actor', how='inner')
print(f"From nmvw dataset, we have {merged_df['no_object_x'].sum()} objects where they are connected to actor whose at least one object has known production location.")

##### Object connected to Bronbeek actors that as sameAs Links with NMVW

Query to generate [CQ4_bb_object.csv](CQ4_bb_object.csv):

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (COUNT(DISTINCT ?bb_obj) AS ?no_object) ?bb_actor
WHERE{
    ?nmvw_actor owl:sameAs ?bb_actor .
    {?bb_acq crm:P23_transferred_title_from ?bb_actor .	
    ?bb_acq crm:P24_transferred_title_of ?bb_obj .}
    UNION{
          ?bb_obj crm:P51_has_former_or_current_owner ?bb_actor .
}

 GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_obj a crm:E22_Human-Made_Object. }
} GROUP BY ?bb_actor 
```

In [None]:
def cleanup(x: str):
    return int(str(x).replace('"', '').strip())

df3 = pd.read_csv("CQ4_bb_object.csv")
df3 = df3.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
# df3 = df3.iloc[: , :-1]
df3.columns = ["no_object" , "bb_actor"]
df3['no_object'] = df3['no_object'].apply(cleanup)
print(f"From bronbeek dataset, we have {df3['no_object'].sum()} objects that are connected with an actor who have owl:sameAs link to nmvw dataset")

Generate query for [sameAs.csv](sameAs.csv)
```
SELECT * WHERE{
    ?nmvw_acotr owl:sameAs ?bb_actor .
}```

In [None]:
df4 = pd.read_csv("sameAs.csv")
df4 = df4.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
# df4 = df4.iloc[: , :-1]
df4.columns = ['nmvw_actor' , 'bb_actor']

In [None]:
merged_df = pd.merge(df3, df4, on='bb_actor', how='inner')
# when there is multiple match on the bb actor, only kept the first match
merged_df = merged_df.drop_duplicates(subset='bb_actor')
merged_df = pd.merge(merged_df, df2, on='nmvw_actor', how='inner')
merged_df['no_object_x'].sum()

In [None]:
print(f"Total actor involved: {merged_df['nmvw_actor'].nunique()}")

In [None]:
240 + 45756

- **Exact String match:** Now for 0 bronbeek object, we can potentially project possible location.
- **Initial+surname match:** Now for 1841 bronbeek objects, we can potentially project possible location.
- **Surname match:** Now for 12776 bronbeek objects, we can potentially project possible location.
- **Fuzzy String match:** Now for 16503 bronbeek objects, we can potentially project possible location.

# CQ-5

```
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

SELECT (AVG(?n) AS ?avg) WHERE
{
  SELECT (COUNT(DISTINCT ?nmvw_obj) AS ?n) 
  WHERE{
      GRAPH <https://pressingmatter.nl/NMVW/ccrdfhiseve.ttl>{
      ?histevet a crm:E7_Activity .
      ?histevet crm:P140i_was_attributed_by ?o .
      ?histevet  crm:P1_is_identified_by ?title .
      ?histevet crm:P4_has_time-span [crm:P82a_begin_of_the_begin ?histevet_btime;
                                       crm:P82b_end_of_the_end ?histevet_etime] .
      ?title crm:P190_has_symbolic_content ?histevet_name .
      ?o a crm:E13_Attribute_Assignment .
      ?o crm:P141_assigned ?nmvw_obj .
    } 
    ?nmvw_obj a crm:E22_Human-Made_Object .
}GROUP BY ?histevet
}
```
> Average object per event: 261.941 with 51 event

Query to download ([CQ5_nmvw.csv](CQ5_nmvw.csv))

```
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT ?histevet ?histevet_btime ?histevet_etime ?nmvw_obj ?nmvw_actor 
  WHERE{
      GRAPH <https://pressingmatter.nl/NMVW/ccrdfhiseve.ttl>{
      ?histevet a crm:E7_Activity .
      ?histevet crm:P140i_was_attributed_by ?o .
      ?histevet  crm:P1_is_identified_by ?title .
      ?histevet crm:P4_has_time-span [crm:P82a_begin_of_the_begin ?histevet_btime;
                                       crm:P82b_end_of_the_end ?histevet_etime] .
      ?title crm:P190_has_symbolic_content ?histevet_name .
      ?o a crm:E13_Attribute_Assignment .
      ?o crm:P141_assigned ?nmvw_obj .
    } 
    ?nmvw_obj a crm:E22_Human-Made_Object .
    ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
    ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .	
  	?nmvw_actor owl:sameAs ?bb_actor .
}
```

```
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT ?histevet ?histevet_btime ?histevet_etime ?nmvw_obj ?nmvw_actor 
  WHERE{
    ?nmvw_actor owl:sameAs ?bb_actor .
    ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor .	
    ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
    ?nmvw_obj a crm:E22_Human-Made_Object .
  GRAPH <https://pressingmatter.nl/NMVW/ccrdfhiseve.ttl>{
    ?o crm:P141_assigned ?nmvw_obj .
    ?o a crm:E13_Attribute_Assignment .
    ?histevet crm:P140i_was_attributed_by ?o .
    ?histevet a crm:E7_Activity .
    ?histevet  crm:P1_is_identified_by ?title .
    ?histevet crm:P4_has_time-span [crm:P82a_begin_of_the_begin ?histevet_btime;
                                       crm:P82b_end_of_the_end ?histevet_etime] .
    ?title crm:P190_has_symbolic_content ?histevet_name .      
    } 
}
```

In [None]:
import pandas as pd

def cleanup_time(x: str):
    # print(type(x))
    return int(str(x).replace('"', '').strip()[:4])

df1 = pd.read_csv("CQ5_nmvw.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df1 = df1.iloc[: , :-1]
df1.columns = [ "histevet" , "histevet_btime" , "histevet_etime" , "nmvw_obj" , "nmvw_actor"]
df1['histevet_btime'] = df1['histevet_btime'].apply(cleanup_time)
df1['histevet_etime'] = df1['histevet_etime'].apply(cleanup_time)

In [None]:
print(f"Total distinct obj:{df1['nmvw_obj'].nunique()} \nTotal distinct actor: {df1['nmvw_actor'].nunique()}\nAvg object: {df1['nmvw_obj'].nunique()/df1['nmvw_actor'].nunique()}")

In [None]:
# drop the nmvw_obj
df1 = df1.drop(['nmvw_obj'], axis=1)
# Remove duplicate actor to event connections
df1 = df1.drop_duplicates()
df1

Query to generate [CQ5_bb.csv](CQ5_bb.csv)

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?bb_obj ?bb_actor ?nmvw_actor ?b_time ?e_time
WHERE{
  	?nmvw_actor owl:sameAs ?bb_actor .
  	?bb_acq2 crm:P23_transferred_title_from ?bb_actor . 
  	?bb_acq2 crm:P24_transferred_title_of ?bb_obj .
    ?prov_activity  crm:P9_consists_of ?bb_acq2 .
    ?prov_activity crm:P2_has_type aat:300055863 .
  	
    ?prov_activity crm:P9_consists_of ?bb_acq .
  	?bb_acq crm:P24_transferred_title_of ?bb_obj .
	  ?bb_acq crm:P4_has_time-span ?time .
  	?time crm:P82a_begin_of_the_begin ?b_time .
    ?time crm:P82b_end_of_the_end ?e_time.
  FILTER (?bb_acq != ?bb_acq2) .

GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_obj a crm:E22_Human-Made_Object. }
}
```

In [None]:
def cleanup_time(x: str):
    # Check if x is not None and not an empty string
    try:
        if x == "None" :
            return 0
        if x and str(x).strip():
            # Convert the cleaned string to an integer
            return int(str(x).replace('"', '').strip()[:4])
    except ValueError:
        return 0

# Read the second CSV file into DataFrame df2
df2 = pd.read_csv("CQ5_bb.csv")
df2 = df2.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)

# df2 = df2.iloc[: , :-1]
df2.columns = ["bb_obj" , "bb_actor" , "nmvw_actor" , "b_time" , "e_time"]
df2['b_time'] = df2['b_time'].apply(cleanup_time)
df2['e_time'] = df2['e_time'].apply(cleanup_time)

# Remove duplicate rows in df2
df2 = df2.drop_duplicates() 

# remove bb_actor matched to multiple nmvw_actor
# df2 = df2.drop_duplicates(subset=['bb_obj'])

# Extract unique 'nmvw_actor' values from df2
unique_nmvw_actor = df1['nmvw_actor'].unique()

# Filter df2 to keep only rows where 'nmvw_actor' exists in df2
df2= df2[df2['nmvw_actor'].isin(unique_nmvw_actor)]

df2 = df2.sort_values('bb_actor')

df2

In [None]:
print(f"{df2['bb_obj'].nunique()} object has connection with actors where actor has connection with historical event with {df2['nmvw_actor'].nunique()} unique actor")

In [None]:
35391 + 13359

# CQ-6a

Query to generate [CQ6a_nmvw.csv](CQ6a_nmvw.csv): 

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?nmvw_obj ?nmvw_actor ?b_time ?e_time
WHERE{
 GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
     ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor.
     ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
     ?prov_activity ?p ?nmvw_acq.
     ?prov_activity crm:P2_has_type aat:300055863 .
     ?prov_activity crm:P4_has_time-span ?time .
     ?time crm:P82a_begin_of_the_begin ?b_time .
     ?time crm:P82b_end_of_the_end ?e_time.
    }
}
```

In [None]:
def cleanup_time(x: str):
    return int(str(x).replace('"', '').strip()[-4:])

import pandas as pd

df1 = pd.read_csv("CQ6a_nmvw.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df1 = df1.iloc[: , :-1]

df1.columns = ['nmvw_obj', 'nmvw_actor', 'b_time', 'e_time']
df1['b_time'] = df1['b_time'].apply(cleanup_time)
df1['e_time'] = df1['e_time'].apply(cleanup_time)
df1

In [None]:
grouped_df = df1.groupby(['nmvw_actor', 'b_time', 'e_time'])['nmvw_obj'].nunique().reset_index()
grouped_df = grouped_df.sort_values(by='nmvw_obj', ascending=False)
grouped_df = grouped_df[grouped_df['nmvw_obj'] != 1]
print(f"Number of objects that can be connected with at least one other object \nbased on they are connected by same actor during similar time is: {grouped_df['nmvw_obj'].sum()}")

Query to generate: [CQ6a_bb.csv](CQ6a_bb.csv)

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?bb_obj ?bb_actor ?nmvw_actor ?b_time ?e_time
WHERE{
  	?nmvw_actor owl:sameAs ?bb_actor .
  	?bb_acq2 crm:P23_transferred_title_from ?bb_actor . 
  	?bb_acq2 crm:P24_transferred_title_of ?bb_obj .
    ?prov_activity  crm:P9_consists_of ?bb_acq2 .
    ?prov_activity crm:P2_has_type aat:300055863 .
  	
    ?prov_activity crm:P9_consists_of ?bb_acq .
  	?bb_acq crm:P24_transferred_title_of ?bb_obj .
	  ?bb_acq crm:P4_has_time-span ?time .
  	?time crm:P82a_begin_of_the_begin ?b_time .
    ?time crm:P82b_end_of_the_end ?e_time.
  FILTER (?bb_acq != ?bb_acq2) .

GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_obj a crm:E22_Human-Made_Object. }
}
```

In [None]:
def cleanup_time(x: str):
    # Check if x is not None and not an empty string
    try:
        if x == "None" :
            return 0
        if x and str(x).strip():
            # Convert the cleaned string to an integer
            return int(str(x).replace('"', '').strip()[:4])
    except ValueError:
        return 0

import pandas as pd

df2 = pd.read_csv("CQ6a_bb.csv")
# df2 = df2.iloc[: , :-1]
df2.columns = ['bb_obj', 'bb_actor', 'nmvw_actor', 'b_time', 'e_time']
df2['b_time'] = df2['b_time'].apply(cleanup_time)
df2['e_time'] = df2['e_time'].apply(cleanup_time)

In [None]:
def merge_with_condition(df1, df2):
    merged_df = pd.merge(df1, df2, on='nmvw_actor', how='inner')
    merged_df['time_diff_b'] = merged_df['b_time_y'] - merged_df['b_time_x']
    merged_df['time_diff_e'] = merged_df['e_time_y'] - merged_df['e_time_x']
    condition = (abs(merged_df['time_diff_b']) <= 20) & (abs(merged_df['time_diff_e']) <= 20)
    return merged_df[condition]

# Merge the dataframes based on the custom condition
result_df = merge_with_condition(df1, df2)
print(result_df.to_csv(index=False))
print("Number of rows in result_df:", len(result_df))

In [None]:
print(f"Unique NMVW actor: {result_df['nmvw_actor'].nunique()}")

# CQ-6b

Query to generate [CQ6b_nmvw.csv](CQ6b_nmvw.csv): 

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?nmvw_obj ?nmvw_actor ?b_time ?e_time
WHERE{
 GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
     ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor.
     ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
     ?prov_activity ?p ?nmvw_acq.
     ?prov_activity crm:P2_has_type aat:300055863 .
     OPTIONAL{
             ?prov_activity crm:P4_has_time-span ?time .
             ?time crm:P82a_begin_of_the_begin ?b_time .
             ?time crm:P82b_end_of_the_end ?e_time.
     }
    }
}
```

In [None]:
def cleanup_time(x: str):
    try:
        return int(str(x).replace('"', '').strip()[-4:])
    except ValueError:
        return None


import pandas as pd

df1 = pd.read_csv("CQ6b_nmvw.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df1 = df1.iloc[: , :-1]
df1.columns = ['nmvw_obj', 'nmvw_actor', 'b_time', 'e_time']
df1['b_time'] = df1['b_time'].apply(cleanup_time)
df1['e_time'] = df1['e_time'].apply(cleanup_time)
df1

In [None]:
df1_time_known = df1.dropna(subset=['b_time', 'e_time'])

grouped_all_df1 = df1.groupby(['nmvw_actor'])['nmvw_obj'].nunique().reset_index()
grouped_all_df1 = grouped_all_df1.sort_values(by='nmvw_obj', ascending=False)

grouped_known_df1 = df1_time_known.groupby(['nmvw_actor'])['nmvw_obj'].nunique().reset_index()
grouped_known_df1 = grouped_known_df1.sort_values(by='nmvw_obj', ascending=False)

merged_df1 = pd.merge(grouped_known_df1, grouped_all_df1, on='nmvw_actor', how='inner')
merged_df1['obj_time_unknown'] = merged_df1['nmvw_obj_y'] - merged_df1['nmvw_obj_x']
print(f"The number of objects we can possibly project the time of acquisition is: {merged_df1['obj_time_unknown'].sum()}")


Query to generate: [CQ6b_bb.csv](CQ6b_bb.csv)

```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?bb_obj ?bb_actor ?nmvw_actor ?b_time ?e_time
WHERE{
  	?nmvw_actor owl:sameAs ?bb_actor .
  	?bb_acq2 crm:P23_transferred_title_from ?bb_actor . 
  	?bb_acq2 crm:P24_transferred_title_of ?bb_obj .
    ?prov_activity  crm:P9_consists_of ?bb_acq2 .
    ?prov_activity crm:P2_has_type aat:300055863 .
  	
    ?prov_activity crm:P9_consists_of ?bb_acq .
  	?bb_acq crm:P24_transferred_title_of ?bb_obj .
	  ?bb_acq crm:P4_has_time-span ?time .
  	?time crm:P82a_begin_of_the_begin ?b_time .
    ?time crm:P82b_end_of_the_end ?e_time.
  FILTER (?bb_acq != ?bb_acq2) .

GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_obj a crm:E22_Human-Made_Object. }
}
```

In [None]:
def cleanup_time(x: str):
    # Check if x is not None and not an empty string
    try:
        if x == "None" :
            return 0
        if x and str(x).strip():
            # Convert the cleaned string to an integer
            return int(str(x).replace('"', '').strip()[:4])
    except ValueError:
        return 0

import pandas as pd

df2 = pd.read_csv("CQ6b_bb.csv")
# df2 = df2.iloc[: , :-1]
df2.columns = ['bb_obj', 'bb_actor', 'nmvw_actor', 'b_time', 'e_time']
df2['b_time'] = df2['b_time'].apply(cleanup_time)
df2['e_time'] = df2['e_time'].apply(cleanup_time)

In [None]:
filtered_df2 = df2[(df2['b_time'] == 0) & (df2['e_time'] == 0)]

# filtered_df2 = filtered_df2.drop_duplicates(subset=['bb_obj'])

grouped_unknown_df2 = filtered_df2.groupby(['nmvw_actor'])['bb_obj'].nunique().reset_index()
grouped_unknown_df2 = grouped_unknown_df2.sort_values(by='bb_obj', ascending=False)

merged_df2 = pd.merge(grouped_known_df1, grouped_unknown_df2, on='nmvw_actor', how='inner')
print(f"There is {merged_df2['bb_obj'].sum()} objects from bronbeek dataset where don't know the acquisition date \nand from the same collector, we know some acquisition date for nmvw object")

> same object can appear multiple times

In [None]:
print(f"Unique NMVW actor: {merged_df2['nmvw_actor'].nunique()}")

# CQ-6c

Query to generate [CQ6c_nmvw.csv](CQ6c_nmvw.csv): 
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?nmvw_obj ?nmvw_prod_place ?nmvw_actor ?b_time ?e_time
WHERE{
 GRAPH <https://pressingmatter.nl/NMVW/ccrdfobj.ttl>{
     ?nmvw_acq crm:P23_transferred_title_from ?nmvw_actor.
     ?nmvw_acq crm:P24_transferred_title_of ?nmvw_obj .
     ?nmvw_obj crm:P108i_was_produced_by ?nmvw_prod .
     ?nmvw_prod crm:P7_took_place_at ?nmvw_prod_place.
     ?prov_activity ?p ?nmvw_acq.
     ?prov_activity crm:P2_has_type aat:300055863 .
     ?prov_activity crm:P4_has_time-span ?time .
     ?time crm:P82a_begin_of_the_begin ?b_time .
     ?time crm:P82b_end_of_the_end ?e_time.
    }
}
```

In [None]:
def cleanup_time(x: str):
    return int(str(x).replace('"', '').strip()[-4:])

import pandas as pd

df1 = pd.read_csv("CQ6c_nmvw.csv")
df1 = df1.map(lambda x: x.replace('"', '').strip() if isinstance(x, str) else x)
df1 = df1.iloc[: , :-1]
df1.columns = ['nmvw_obj', 'nmvw_prod_place', 'nmvw_actor', 'b_time', 'e_time']
df1['b_time'] = df1['b_time'].apply(cleanup_time)
df1['e_time'] = df1['e_time'].apply(cleanup_time)
df1.head(15)

In [None]:
grouped_df1 = df1.groupby(['nmvw_actor', 'nmvw_prod_place', 'b_time', 'e_time'])['nmvw_obj'].nunique().reset_index()
grouped_df1 = grouped_df1.sort_values(by='nmvw_obj', ascending=False)
grouped_df1 = grouped_df1[grouped_df1['nmvw_obj'] != 1]

print(f"Number of objects that can be connected with at least one other object \nbased on they are connected by same actor during similar time and similar location is: {grouped_df1['nmvw_obj'].sum()}")

Query to generate: [CQ6c_bb.csv](CQ6c_bb.csv)
```
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX aat: <http://vocab.getty.edu/aat/>

SELECT ?bb_obj ?bb_actor ?nmvw_actor ?b_time ?e_time
WHERE{
  	?nmvw_actor owl:sameAs ?bb_actor .
  	?bb_acq2 crm:P23_transferred_title_from ?bb_actor . 
  	?bb_acq2 crm:P24_transferred_title_of ?bb_obj .
    ?prov_activity  crm:P9_consists_of ?bb_acq2 .
    ?prov_activity crm:P2_has_type aat:300055863 .
  	
    ?prov_activity crm:P9_consists_of ?bb_acq .
  	?bb_acq crm:P24_transferred_title_of ?bb_obj .
	  ?bb_acq crm:P4_has_time-span ?time .
  	?time crm:P82a_begin_of_the_begin ?b_time .
    ?time crm:P82b_end_of_the_end ?e_time.
  FILTER (?bb_acq != ?bb_acq2) .

GRAPH <https://pressingmatter.nl/Bronbeek/Objects/Objects/assertion/bad21d24/2024-03-01T12:54>{
    ?bb_obj a crm:E22_Human-Made_Object. }
}
```

In [None]:
def cleanup_time(x: str):
    # Check if x is not None and not an empty string
    try:
        if x == "None" :
            return 0
        if x and str(x).strip():
            # Convert the cleaned string to an integer
            return int(str(x).replace('"', '').strip()[:4])
    except ValueError:
        return 0

import pandas as pd

df2 = pd.read_csv("CQ6c_bb.csv")
# df2 = df2.iloc[: , :-1]
df2.columns = ['bb_obj', 'bb_actor', 'nmvw_actor', 'b_time', 'e_time']
df2['b_time'] = df2['b_time'].apply(cleanup_time)
df2['e_time'] = df2['e_time'].apply(cleanup_time)

In [None]:
def merge_with_condition(df1, df2):
    merged_df = pd.merge(df1, df2, on='nmvw_actor', how='inner')
    merged_df['time_diff_b'] = merged_df['b_time_y'] - merged_df['b_time_x']
    merged_df['time_diff_e'] = merged_df['e_time_y'] - merged_df['e_time_x']
    condition = (abs(merged_df['time_diff_b']) <= 20) & (abs(merged_df['time_diff_e']) <= 20)
    return merged_df[condition]

# Merge the dataframes based on the custom condition
result_df = merge_with_condition(df1, df2)
print(result_df.to_csv(index=False))
print("Number of rows in result_df:", len(result_df))

In [None]:
print(f"Unique NMVW actor: {result_df['nmvw_actor'].nunique()}")