### Import Dependencies

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json

### Load Output csv file of the second Experiment

In [2]:
new_column_names = ['Table Name', 'Column Index', 'Row Index', 'Retrieved Annotation']
df_output = pd.read_csv("Dataset/output/cea annotation/cea_tfood_first_experiment.csv", header=None, names=new_column_names)
df_output.head()

Unnamed: 0,Table Name,Column Index,Row Index,Retrieved Annotation
0,AWP193T195U1O0,1,2,http://www.wikidata.org/entity/Q17
1,PFPX10U29,1,1,http://www.wikidata.org/entity/Q11148917
2,PFPX10U41,1,1,http://www.wikidata.org/entity/Q584469
3,PFPX10U44,1,1,http://www.wikidata.org/entity/Q3680646
4,PFPX10U44,1,2,http://www.wikidata.org/entity/Q112605150


In [34]:
df_output.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2265 entries, 0 to 2264
Data columns (total 4 columns):
Table Name              2265 non-null object
Column Index            2265 non-null int64
Row Index               2265 non-null int64
Retrieved Annotation    2264 non-null object
dtypes: int64(2), object(2)
memory usage: 70.9+ KB


### Retrieve and Analyse returned NaN by the API 

In [35]:
# Retrieve all rows with NaN values in a specific column
nan_rows = df_output[df_output["Retrieved Annotation"] == ""]

# Print the resulting dataframe
print(nan_rows)

Empty DataFrame
Columns: [Table Name, Column Index, Row Index, Retrieved Annotation]
Index: []


### Retrieve and Analyse incorrect annotations 

In [36]:
# Mapping with the value
new_column_names = ['Table Name', 'Column Index', 'Row Index', 'Target Annotation']
cea_gt = pd.read_csv(f"Dataset/val/gt/cea_gt.csv", header=None, names=new_column_names)

In [37]:
# join prediction and target dataframes
merged_df = pd.merge(cea_gt, df_output, on=['Table Name', 'Column Index', 'Row Index'])

In [38]:
merged_df.head()

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation
0,AWP193T195U1O0,1,2,http://www.wikidata.org/entity/Q17,http://www.wikidata.org/entity/Q17
1,PFPX10U29,1,1,http://www.wikidata.org/entity/Q11148917,http://www.wikidata.org/entity/Q11148917
2,PFPX10U41,1,1,http://www.wikidata.org/entity/Q584469,http://www.wikidata.org/entity/Q584469
3,PFPX10U44,1,1,http://www.wikidata.org/entity/Q3680646,http://www.wikidata.org/entity/Q3680646
4,PFPX10U44,1,2,http://www.wikidata.org/entity/Q112605150,http://www.wikidata.org/entity/Q112605150


In [44]:
merged_df[merged_df["Table Name"] == "AVMN45V10"]

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
36,AVMN45V10,1,1,"http://www.wikidata.org/entity/Q281,http://www...","http://www.wikidata.org/entity/Q281,http://www...","whisky, brandy, carbonated water, Selters, Q12..."
37,AVMN45V10,1,2,http://www.wikidata.org/entity/Q20,http://www.wikidata.org/entity/Q20,Norway


In [48]:
# Add Cell Content
def retrieve_cell_value(row):
    df = pd.read_csv(f"Dataset/val/tables/{row['Table Name']}.csv")
    cell_value = df.iloc[row["Row Index"] - 1, row["Column Index"]]
    return cell_value
    
merged_df["Cell Value"] = merged_df.apply(lambda row: retrieve_cell_value(row), axis=1)

In [49]:
### Retrieve Incorrect annotations
incorrect_rows = merged_df[merged_df["Retrieved Annotation"] != merged_df["Target Annotation"]]
incorrect_rows

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
27,WXFE22C3,1,4,http://www.wikidata.org/entity/Q61864890,http://www.wikidata.org/entity/Q16996794,lobster
36,AVMN45V10,1,1,"http://www.wikidata.org/entity/Q281,http://www...","http://www.wikidata.org/entity/Q281,http://www...","whisky, brandy, carbonated water, Selters, Q12..."
44,AVMN45V48,1,1,http://www.wikidata.org/entity/Q13179,http://www.wikidata.org/entity/Q12252383,raspberry
50,AVMN45V55,1,2,"http://www.wikidata.org/entity/Q10210,http://w...","http://www.wikidata.org/entity/Q10210,http://w...","white wine, sugar, egg"
52,AVMN45V59,1,3,"http://www.wikidata.org/entity/Q11002,http://w...","http://www.wikidata.org/entity/Q23118,http://w...","sugar, rum, Stroh, arrack, fruit"
...,...,...,...,...,...,...
2224,TPJS166A453,1,3,"http://www.wikidata.org/entity/Q1166136,http:/...","http://www.wikidata.org/entity/Q1166136,http:/...","Phaseolus lunatus, potato, fried cheese, maize"
2227,TPJS166A457,1,4,"http://www.wikidata.org/entity/Q205762,http://...","http://www.wikidata.org/entity/Q205762,http://...","goulash, fricassee, azu"
2233,TPJS166A489,1,1,"http://www.wikidata.org/entity/Q148,http://www...","http://www.wikidata.org/entity/Q148,http://www...","People's Republic of China, China"
2245,IYNT176I00,1,4,http://www.wikidata.org/entity/Q20638126,http://www.wikidata.org/entity/Q23501,tomato


In [51]:
incorrect_rows["Cell Value"].values

array(['lobster', 'whisky, brandy, carbonated water, Selters, Q12646666',
       'raspberry', 'white wine, sugar, egg',
       'sugar, rum, Stroh, arrack, fruit', 'Pisco, bitters, ginger ale',
       'cinnamon, egg, walnut', 'rice, green bean, torikatsu',
       'table salt, butter, sour cream, panko, jasmine rice, green bean, peppercorn, corn starch, vegetable oil, garlic powder, cutlet, sesame seed, ginger, tonkatsu sauce',
       'rice, Glycine max, Welsh onion, Zingiber officinale, sesame',
       'rice, pork chop', 'sugar, peanut, sesame seed', 'snipe', 'rose',
       'northern pike', 'mutton', 'Huso', 'deer', 'coral', 'pear', 'frog',
       'fat',
       'Brockhaus and Efron Encyclopedic Dictionary, Meyers Konversations-Lexikon, 4th edition (1885–1890)',
       'farina',
       'Fanta, Sprite, Angostura bitters, lemon, grenadine, cucumber',
       'rice, offal, pork meat', 'BLT, avocado', 'pink, ochre', 'Baisu',
       'rice', 'Coca-Cola', 'Pisco, Algarrobina', 'buttermilk, curd'

In [52]:
incorrect_rows[incorrect_rows["Cell Value"] == "margarine, sugar, table salt, flour, egg, cooking oil, coconut milk, hot water, fermentation starter, coconut, turmeric rhizome, sago flour, kaffir lime leaf"]

Unnamed: 0,Table Name,Column Index,Row Index,Target Annotation,Retrieved Annotation,Cell Value
805,CQYG166I1328,1,6,"http://www.wikidata.org/entity/Q4287,http://ww...","http://www.wikidata.org/entity/Q4287,http://ww...","margarine, sugar, table salt, flour, egg, cook..."
