In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load afe data and domain knowledge
df = pd.read_csv("issuer_image_id_v3.csv")
df_domain_knowledge = pd.read_excel('2024_02_06_issuingFor_possibleIssuers.xlsx')



KeyboardInterrupt



In [3]:
# Transform domain knowledge to triple structure
df_domain_knowledge = df_domain_knowledge[['id', 'issuer']]
df_domain_knowledge['id'] = 'ex:issuing_for'+df_domain_knowledge['id'].astype(str)
df_domain_knowledge['issuer'] = 'ex:issuer'+df_domain_knowledge['issuer'].astype(str)
df_domain_knowledge = df_domain_knowledge.rename(columns={
    'id': 's',
    'issuer': 'o'
})

df_domain_knowledge['p'] = 'ex:domain_knowledge'
df_domain_knowledge

Unnamed: 0,s,o,p
0,ex:issuing_for2,ex:issuer33,ex:domain_knowledge
1,ex:issuing_for2,ex:issuer48,ex:domain_knowledge
2,ex:issuing_for4,ex:issuer48,ex:domain_knowledge
3,ex:issuing_for4,ex:issuer52,ex:domain_knowledge
4,ex:issuing_for4,ex:issuer53,ex:domain_knowledge
...,...,...,...
188,ex:issuing_for195,ex:issuer281,ex:domain_knowledge
189,ex:issuing_for196,ex:issuer59,ex:domain_knowledge
190,ex:issuing_for198,ex:issuer106,ex:domain_knowledge
191,ex:issuing_for198,ex:issuer289,ex:domain_knowledge


In [4]:
df.columns

Index(['id', 'Issuer', 'IssuerID', 'IssuerUnvertainty', 'image_1_Name',
       'image_2_Name', 'CoinImage_Uncertainty', 'Function', 'Material', 'Mint',
       'Mint2', 'Period', 'Denomination', 'Denomination2', 'FindCategory',
       'CoinImage', 'CoinImage2', 'DateFrom', 'DateTo', 'DateWritten',
       'MintMark', 'Countermark', 'Weight', 'DiameterMin', 'DiameterMax',
       'DieAxis', 'ObverseReverse', 'YearFound', 'OriginalSeen',
       'InternalNotes', 'Cleaned', 'Remarks', 'Material_Uncertainty',
       'DateFrom_Uncertainty', 'DateTo_Uncertainty', 'Weight_Uncertainty',
       'DiameterMin_Uncertainty', 'DiameterMax_Uncertainty',
       'DieAxis_Uncertainty', 'Findcategory_Uncertainty',
       'Function_Uncertainty', 'Period_Uncertainty',
       'Denomination_Uncertainty', 'Mint_Uncertainty', 'YearFound_Uncertainty',
       'Wear_Fs', 'Wear_Fs_Uncertainty', 'Wear_Bs', 'Wear_Bs_Uncertainty',
       'Corrosion_Fs', 'Corrosion_Fs_Uncertainty', 'Corrosion_Bs',
       'Corrosion_Bs_Unc

In [5]:
df = df[['id', 'Issuer', 'IssuerID', 'IssuerUnvertainty', 'image_1_Name', 'image_2_Name',
       'CoinImage_Uncertainty', 'CoinImage', 'CoinImage2']]

In [6]:
# Transform issuer and depicted persons to triple structure
df['CoinImage'] = 'ex:issuing_for'+df['CoinImage'].astype(str)
df['CoinImage2'] = 'ex:issuing_for'+df['CoinImage2'].astype(str)
df['id'] = 'ex:coin'+df['id'].astype(str)
df['IssuerID'] = 'ex:issuer'+df['IssuerID'].astype(str)

In [7]:
# Maybe also include the names, but they are not used in the reasoning anyway
df_issuer_masterdata = df[['Issuer', 'IssuerID']].drop_duplicates()
df_issuer_masterdata = df_issuer_masterdata.rename(columns={'IssuerID': 's', 'Issuer': 'o'})
df_issuer_masterdata['p'] = 'ex:name'

# Issuer alternatives are in different rows
df_issuer = df[['id', 'IssuerID', 'IssuerUnvertainty']]
# Identify issuer selections that are uncertain
df_issuer_uncertain = df_issuer[df_issuer['IssuerUnvertainty'].isin(['uncertain', 'uncertain and alternative'])][['id']].drop_duplicates()
df_issuer_uncertain = df_issuer_uncertain.rename(columns={'id': 's'})
df_issuer_uncertain['o'] = 'ex:uncertain'
df_issuer = df_issuer.rename(columns={'id': 's', 'IssuerID': 'o'})
df_issuer = pd.concat([df_issuer.drop(columns=['IssuerUnvertainty']), df_issuer_uncertain])
df_issuer['p'] = 'ex:issuer'

# Issuing for master data. Not used during reasoning
df_issuing_for_masterdata_1 = df[['image_1_Name', 'CoinImage']].rename(columns={
    'CoinImage': 's',
    'image_1_Name': 'o'
})
df_issuing_for_masterdata_2 = df[['image_2_Name', 'CoinImage2']].rename(columns={
    'CoinImage2': 's',
    'image_2_Name': 'o'
})
df_issuing_for_masterdata = pd.concat([df_issuing_for_masterdata_1, df_issuing_for_masterdata_2]).drop_duplicates()

# Depicted person alternatives are in different columns
df_issuing_for_1 = df[['id', 'CoinImage']].drop_duplicates().rename(columns={
    'CoinImage': 'o',
    'id': 's'
})
df_issuing_for_2 = df[['id', 'CoinImage2']].drop_duplicates().rename(columns={
    'CoinImage2': 'o',
    'id': 's'
})
df_issuing_for_uncertain = df[['id', 'CoinImage_Uncertainty']].drop_duplicates().drop(columns=['CoinImage_Uncertainty']).rename(columns={
    'id': 's'
})
df_issuing_for_uncertain['o'] = 'ex:uncertain'
df_issuing_for = pd.concat([df_issuing_for_1, df_issuing_for_2, df_issuing_for_uncertain])
df_issuing_for['p'] = "ex:issuing_for"

In [8]:
df_issuer

Unnamed: 0,s,o,p
0,ex:coin3122,ex:issuer83,ex:issuer
1,ex:coin3122,ex:issuer88,ex:issuer
2,ex:coin3131,ex:issuer83,ex:issuer
3,ex:coin3131,ex:issuer88,ex:issuer
4,ex:coin4644,ex:issuer83,ex:issuer
5,ex:coin4646,ex:issuer88,ex:issuer
6,ex:coin4733,ex:issuer83,ex:issuer
7,ex:coin4733,ex:issuer88,ex:issuer
8,ex:coin4734,ex:issuer83,ex:issuer
9,ex:coin4734,ex:issuer88,ex:issuer


In [9]:
df_issuing_for

Unnamed: 0,s,o,p
0,ex:coin3122,ex:issuing_for17,ex:issuing_for
2,ex:coin3131,ex:issuing_for15,ex:issuing_for
4,ex:coin4644,ex:issuing_for15,ex:issuing_for
5,ex:coin4646,ex:issuing_for17,ex:issuing_for
6,ex:coin4733,ex:issuing_for15,ex:issuing_for
...,...,...,...
32,ex:coin15663,ex:uncertain,ex:issuing_for
33,ex:coin16646,ex:uncertain,ex:issuing_for
34,ex:coin17206,ex:uncertain,ex:issuing_for
36,ex:coin17818,ex:uncertain,ex:issuing_for


In [10]:
df_domain_knowledge

Unnamed: 0,s,o,p
0,ex:issuing_for2,ex:issuer33,ex:domain_knowledge
1,ex:issuing_for2,ex:issuer48,ex:domain_knowledge
2,ex:issuing_for4,ex:issuer48,ex:domain_knowledge
3,ex:issuing_for4,ex:issuer52,ex:domain_knowledge
4,ex:issuing_for4,ex:issuer53,ex:domain_knowledge
...,...,...,...
188,ex:issuing_for195,ex:issuer281,ex:domain_knowledge
189,ex:issuing_for196,ex:issuer59,ex:domain_knowledge
190,ex:issuing_for198,ex:issuer106,ex:domain_knowledge
191,ex:issuing_for198,ex:issuer289,ex:domain_knowledge


In [11]:
df = pd.concat([df_domain_knowledge, df_issuing_for, df_issuer])

In [12]:
df['model'] = np.nan
df['certainty'] = 1.0

In [13]:
df.to_csv('afe_input.csv', index=False)