# Detecting pairs of f-terms with supposed similarity

In [1]:
import pandas as pd
df=pd.read_csv("data/f-terms.csv", index_col=0)

In [2]:
#subset with "material" in the viewpoint description
df2=df[df.viewpoint_label.str.contains("material",case=False, na=False)].copy()
df2["vp"]=df2.theme+"/"+df2.viewpoint
df2["fterm"]=df2.theme+"/"+df2.number

In [3]:
#f-term descriptions are searched for the following materials
materials_list=["metal", "wood", "concrete", "resin", "fiber", "wool", "sand", "cord", "copper", "iron", "silver", "gold", "lead", "glass", "stone", "titanium", "steel", "cement", "silicon", "polymer", "ceramics"]
materials_f_terms={}
for material in materials_list:
    materials_f_terms[material]=[df2[df2.label.str.contains(material,case=False, na=False)].vp.unique(), df2[df2.label.str.contains(material,case=False, na=False)].fterm.values]

In [21]:
#pairs of two materials with fterms in same viewpoints are created
material_combinations={}
for mat1 in materials_list:
    for mat2 in materials_list:
        
        if mat1==mat2: continue
        if mat2+"_"+mat1 in materials_f_terms: continue
        
        #filtering shared f-terms with both materials in the label description
        
        unique_fterms_mat1 = list(set(materials_f_terms[mat1][1]) - set(materials_f_terms[mat2][1]))
        unique_fterms_mat2 = list(set(materials_f_terms[mat2][1]) - set(materials_f_terms[mat1][1]))


        vp_mat1= set([x[:8]for x in unique_fterms_mat1])
        vp_mat2= set([x[:8]for x in unique_fterms_mat2])
        shared_vp=[x for x in vp_mat1 if x in vp_mat2]
        fterm_pairs=[]
        for vp in shared_vp:
            fterm_pairs.append([vp,
                                [fterm for fterm in unique_fterms_mat1 if fterm.startswith(vp)],
                                [fterm for fterm in unique_fterms_mat2 if fterm.startswith(vp)]])
            
            
        material_combinations[mat1+"_"+mat2]=fterm_pairs
        

In [22]:
#output for the combination glass and metal. [viewpoint, [list of fterms for material1], [list of fterms for material 2]]
material_combinations["glass_metal"]

[['2E174/AA', ['2E174/AA05'], ['2E174/AA01']],
 ['3B107/AA', ['3B107/AA02'], ['3B107/AA04']],
 ['4C047/BB', ['4C047/BB01'], ['4C047/BB03']],
 ['2E002/NB', ['2E002/NB02'], ['2E002/NB03']],
 ['2E108/CC', ['2E108/CC18'], ['2E108/CC01']],
 ['5D029/KA', ['5D029/KA24'], ['5D029/KA22']],
 ['3J024/BA', ['3J024/BA04'], ['3J024/BA01', '3J024/BA05']],
 ['5H029/EJ', ['5H029/EJ06'], ['5H029/EJ01']],
 ['4G072/FF', ['4G072/FF01'], ['4G072/FF04']],
 ['3F057/AB', ['3F057/AB07'], ['3F057/AB08']],
 ['5G206/CS', ['5G206/CS07'], ['5G206/CS11', '5G206/CS12', '5G206/CS13']],
 ['4G052/GA', ['4G052/GA12', '4G052/GA09'], ['4G052/GA15']],
 ['4F207/AJ', ['4F207/AJ06'], ['4F207/AJ02']],
 ['4C059/SS', ['4C059/SS02'], ['4C059/SS03']],
 ['5D112/BA', ['5D112/BA03'], ['5D112/BA05', '5D112/BA10']],
 ['4G061/CB', ['4G061/CB06', '4G061/CB13'], ['4G061/CB14']],
 ['5D016/EB', ['5D016/EB07'], ['5D016/EB05']],
 ['2C036/BF', ['2C036/BF02'], ['2C036/BF01']],
 ['5H604/DA', ['5H604/DA06'], ['5H604/DA04', '5H604/DA02']],
 ['4F210/

In [27]:
mat1+"_"+"iron" in material_combinations

True

# Similarity pairs material-process on theme level

In [16]:
#subset with "material" in the viewpoint description
df["vp"]=df.theme+"/"+df.viewpoint
df["fterm"]=df.theme+"/"+df.number
df2=df.copy()

In [34]:
#f-term descriptions are searched for the following materials
materials_list=[". Metal", ". Wood", ". Polymer", ". Cutting", ". Bleaching", ". Adhes"]
materials_f_terms={}
for material in materials_list:
    materials_f_terms[material]=[df2[df2.label.str.startswith(material, na=False)].theme.unique(), df2[df2.label.str.startswith(material, na=False)].fterm.values]

In [35]:
#pairs of two materials with fterms in same viewpoints are created
material_combinations={}
for mat1 in materials_list:
    for mat2 in materials_list:
        
        if mat1==mat2: continue
        if mat2[2:]+"_"+mat1[2:] in materials_f_terms: continue
        
        #filtering shared f-terms with both materials in the label description
        
        unique_fterms_mat1 = list(set(materials_f_terms[mat1][1]) - set(materials_f_terms[mat2][1]))
        unique_fterms_mat2 = list(set(materials_f_terms[mat2][1]) - set(materials_f_terms[mat1][1]))


        vp_mat1= set([x[:5]for x in unique_fterms_mat1])
        vp_mat2= set([x[:5]for x in unique_fterms_mat2])
        shared_vp=[x for x in vp_mat1 if x in vp_mat2]
        fterm_pairs=[]
        for vp in shared_vp:
            fterm_pairs.append([vp,
                                [fterm for fterm in unique_fterms_mat1 if fterm.startswith(vp)],
                                [fterm for fterm in unique_fterms_mat2 if fterm.startswith(vp)]])
            
            
        material_combinations[mat1[2:]+"_"+mat2[2:]]=fterm_pairs
        

In [36]:
material_combinations

{'Metal_Wood': [['2D064', ['2D064/CA04'], ['2D064/CA08']],
  ['2E038', ['2E038/BA01'], ['2E038/BA05']],
  ['3F105', ['3F105/AA08'], ['3F105/AA09']],
  ['5D002', ['5D002/DD06'], ['5D002/DD03']],
  ['3E066', ['3E066/CA11'], ['3E066/CA06']],
  ['4E093',
   ['4E093/QA04', '4E093/KB04', '4E093/HA02', '4E093/JA02'],
   ['4E093/HA01', '4E093/JA01']],
  ['2B150', ['2B150/ED09', '2B150/DH01'], ['2B150/CA31']],
  ['3F102', ['3F102/AA20'], ['3F102/AB07']],
  ['2E036', ['2E036/CA05', '2E036/KB01'], ['2E036/KB04']],
  ['2E001',
   ['2E001/HB01', '2E001/JB07', '2E001/JB01'],
   ['2E001/HC01', '2E001/HC11', '2E001/EA08', '2E001/JC01']],
  ['3E070', ['3E070/SA10', '3E070/MA01', '3E070/DA01'], ['3E070/DA06']],
  ['3F023', ['3F023/AB06'], ['3F023/AB08']],
  ['3E096', ['3E096/EA06'], ['3E096/EA09']],
  ['2D032', ['2D032/AB04'], ['2D032/AB01']],
  ['3F048', ['3F048/AB07'], ['3F048/AB08']],
  ['3K065', ['3K065/FB11'], ['3K065/AC17']],
  ['2E150', ['2E150/MA01'], ['2E150/MA46']],
  ['3F103', ['3F103/AA05'],