# Stats for eHRAF Scraper
The current stats are merely to 
- Clean and reorganize the dataframe 
- Find the most common OCM codes
- Find association rules (when one OCM appears this other OCM is likely to appear)
More work will be done


## Clean the Dataframe


In [1]:
import pandas as pd                 # dataframe storing
import numpy as np
import re                           # regex for searching through strings

# put folder name here
folder = r'subjects-(sickness)_FILTERS-culture_level_samples(PSF)'


df = pd.read_excel('../Data/' + folder + '/_Altogether_Dataset.xlsx')
# Turn the string of column OCM back into a list 
df['OCM'] = df.OCM.apply(lambda x: re.sub(" ",'',x))
df['OCM'] = df.OCM.apply(lambda x: x[1:-1].split(','))

# did it work? did it output a single OCM string?
df['OCM'][0][0]

'752'

In [2]:
# drop all rows that have a blank passage
print(f'Before: {len(df)}')
df = df.dropna(subset="Passage")
print(f'After: {len(df)}')

Before: 42555
After: 42458


In [3]:
# If you use a higher order code (750) eHRAF attempts to aquire ALL OCMs related to your input.
# select only the OCMs we originally wished to search for by inputting OCM's into a list
lst = ["750","751","752","753"]
# lst = ["751", "752"]
msk = df['OCM'].apply(lambda x: not set(x).isdisjoint(lst))
df = df.loc[msk]
len(df)

6440

### Remove Duplicates
Currently, only passages will be removed if they contain a duplicate passage with the same OCMs. Duplicate passages with different OCMs will remain

In [4]:
# (exploratory)  
# Find all passages which are duplicates but do not share the same document. 
# First let's explore some of the duplicates
dup1 = df["Passage"].duplicated(keep=False)  # find all duplicate passages
dup2 = df[dup1].duplicated(subset=["Passage", "DocTitle"], keep=False) #of the duplicate passages, find those that shair a passage and doc title
# rows which contain duplicate passages but not part of the same document (only top 4 shown)
print(f'Number of passages whose duplicates come from different documents: {len(df[dup1][~dup2].sort_values(by="Passage"))}')
df[dup1][~dup2].sort_values(by='Passage').head(4)

Number of passages whose duplicates come from different documents: 0


Unnamed: 0,Passage Number,Region,SubRegion,Culture,DocTitle,Section,Author,Page,Year,OCM,OWC,Passage,run_Info


In [5]:
# (exploratory) 
# Find passages which have duplicates but whose duplicates have different OCM numbers
df = df.copy()
df["OCM"] = df['OCM'].apply(tuple) #turn the OCM list to a tuple to allow for comparisons

# Of the passages which have duplicates, find and keep all which have the same OCM
dup3 = df[dup1].duplicated(subset=["Passage", "OCM"], keep=False)
# Show only the passages with duplicates but NOT matching OCMs
print(f'Number of passages whose duplicates do not share OCMs:  {len(df[dup1][~dup3].sort_values(by="Passage"))}')
df["OCM"] = df['OCM'].apply(list)
df[dup1][~dup3].sort_values(by="Passage").head(4)

Number of passages whose duplicates do not share OCMs:  10


Unnamed: 0,Passage Number,Region,SubRegion,Culture,DocTitle,Section,Author,Page,Year,OCM,OWC,Passage,run_Info
13713,13714,Asia,Southeast Asia,Ifugao,The Mayawyaw ritual: VI. illness and its ritual,6. THE SACRIFICE FOR THE BA’NIG or Phantoms,"Lambrecht, Francis",65,1955,"[539, 751, 755, 776, 782, 787, 793]",oa19,"1. Are living Bugan and Wigan at Dukligan,/ we...",
14546,14547,Asia,Southeast Asia,Ifugao,The Mayawyaw ritual: VI. illness and its ritual,b. The Lu’tap Rites,"Lambrecht, Francis",135,1955,"[539, 752, 755, 775, 776, 782, 787, 793]",oa19,"1. Are living Bugan and Wigan at Dukligan,/ we...",
13709,13710,Asia,Southeast Asia,Ifugao,The Mayawyaw ritual: VI. illness and its ritual,6. THE SACRIFICE FOR THE BA’NIG or Phantoms,"Lambrecht, Francis",65,1955,"[751, 755, 776, 782, 787]",oa19,"1. Mata’gu cha Bu’gan ya Wi’gan ad Chu-li’gan,...",
14330,14331,Asia,Southeast Asia,Ifugao,The Mayawyaw ritual: VI. illness and its ritual,a. Rites of the first Day .,"Lambrecht, Francis",121,1955,"[539, 752, 755, 775, 776, 782, 793]",oa19,"1. Mata’gu cha Bu’gan ya Wi’gan ad Chu-li’gan,...",


In [6]:
# remove all duplicated passages which share OCMs

df["OCM"] = df['OCM'].apply(tuple) #turn the OCM list to a tuple to allow for comparisons

# drop duplicates
print(f'Before {len(df)}')
df.drop_duplicates(subset=["Passage", "OCM"], keep='first', inplace=True)
print(f'After {len(df)}')

df["OCM"] = df['OCM'].apply(list) #turn the OCM back to a list

Before 6440
After 6408


### Shave OCMs
And make an exploded OCM dataframe

In [7]:
# Make a dataset in which each OCM have its own row by exploding (you can reset the index with .reset_index(drop=True))
df_OCM = df.explode(column='OCM').reset_index(drop=True)
# Find OCM's that do not fit the normal 100-900 OCM scheme
# NOTE 0 means the material is not relevant, I am unsure, however, why this sometimes appears with other OCM's in the same passage
# NOTE I believe 5310 and 5311 are different specifications of 531 while 1710 might be a more specific (and singlular) subset of 171? I do not believe the same for 77 and 1787
list_OCM = df_OCM['OCM'].value_counts().index.tolist()
small_OCM = [x for x in list_OCM if len(x) <3 or len(x) > 3]
small_OCM

['0', '5310', '5311']

In [8]:
# remove and shave OCM codes
# add to the list for codes which should be removed
remove_list = ['1787','77']
print(f'starting list {len(df_OCM)}')
for i in remove_list:
    df_OCM = df_OCM[df_OCM["OCM"] != i]
# "Shave" the OCM codes that seem to have a parent (5310 and 5311 become 531).
df_OCM['OCM'] = df_OCM.OCM.apply(lambda x: x[0:3] if len(x) >= 3 else x)
print(f'Ending list {len(df_OCM)}')

starting list 25617
Ending list 25617


In [9]:
# Apply the removals like above to the original dataframe (this is easier than just imploding as there are duplicates which limit this)

# remove specified OCM codes
df["OCM"] = df["OCM"].apply(lambda x: [item for item in x if item not in remove_list])
# shorten the 'small_OCM' OCMs so that 5310 becomes 531
df["OCM"] = df["OCM"].apply(lambda x: [item[0:3] if item in small_OCM else item for item in x])
print(len(df))
# explantaion of above list comprehension: go through every row of the column "OCM" (via apply) 
# lambda x is an anonymous function which takes the row "x" and inputs it into the function.
# each row has its list items iterated over ( "___ for item in x") and checked if each list item is part of the small_OCM list, if so,
# return the first 3 characters, if not, return the original list item. Return everything back as a list and apply it to the dataframe


6408


### Create Dictionary for later count comparisons

In [10]:
# Find the number of passages for each culture
culture_set = set(df["Culture"])
culture_dict = {}
it_count = 0
for cult_i in culture_set:
    row_count = len(df.loc[df["Culture"]==cult_i])
    culture_dict[cult_i] = row_count
    it_count += row_count
print(f'Passages: {it_count}')


Passages: 6408


### Clean passage text

In [11]:
df.loc[df["Culture"] == "Somali"].iloc[21]["Passage"]

'When in 1929 a serpent bit a Migiurtina woman and her daughter, the daughter of the Isl[unknown]an of the ‘Umar Ma[unknown]hm[unknown]ud, Dahab[unknown]o Isl[unknown]an, intervened and pronounced this formula, addressed to the serpent: hadd[unknown]ad ab[unknown]oug[unknown]ay tah[unknown]ay dadk[unknown]an wa[unknown]h g[unknown]ari m[unknown]ayo[unknown] ab[unknown]ou ad n[unknown]o ah[unknown]ayde[unknown] hilibk[unknown]ay[unknown] harân b[unknown]u k[unknown]a ahây[unknown] nô ma dili[unknown] girín[unknown] annáguná k[unknown]u ma dill[unknown]an[unknown] Ma[unknown]hm[unknown]ud Sal[unknown]emân haddád taháy dadkán wah gâri mâyo[unknown] haddád tĝ taháy wa[unknown]h hún b[unknown]a l[unknown]o g[unknown]u gú arkáya[unknown] ‘If you are my grandfather, nothing (bad) will happen to them. You were grandfather for us. My flesh was illicit for you. You did not bite us and we did not strike you. If you are a Ma[unknown]hm[unknown]ud Sal[unknown]em[unknown]an, nothing (bad) will happe

In [12]:
# Remove all "[unknwon]" text within the passages"
df["Passage"] = df["Passage"].apply(lambda x: re.sub(r"\[unknown\]",'', x))

In [13]:

df.loc[df["Culture"] == "Somali"].iloc[21]["Passage"]


'When in 1929 a serpent bit a Migiurtina woman and her daughter, the daughter of the Islan of the ‘Umar Mahmud, Dahabo Islan, intervened and pronounced this formula, addressed to the serpent: haddad abougay tahay dadkan wah gari mayo abou ad no ahayde hilibkay harân bu ka ahây nô ma dili girín annáguná ku ma dillan Mahmud Salemân haddád taháy dadkán wah gâri mâyo haddád tĝ taháy wah hún ba lo gu gú arkáya ‘If you are my grandfather, nothing (bad) will happen to them. You were grandfather for us. My flesh was illicit for you. You did not bite us and we did not strike you. If you are a Mahmud Saleman, nothing (bad) will happen to them. If you are a thief, ugly things will be seen in you.’'

### Optional Exploration

In [14]:
# (OPTIONAL)
# Quick search for OCMs regardless of culture
# NOTE, sometimes a higher order code like 750 appears without lower order codes)
lst = ["159"] #enter your OCM strings here separated by a comma
msk = df['OCM'].apply(lambda x: not set(x).isdisjoint(lst))
out = df.loc[msk]
out

Unnamed: 0,Passage Number,Region,SubRegion,Culture,DocTitle,Section,Author,Page,Year,OCM,OWC,Passage,run_Info
192,193,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,§ 6. ORGANIC ILLNESS,"Field, M. J. (Margaret Joyce)",117,1970,"[159, 164, 743, 753, 778]",fe12,A few examples will illuminate the attitude of...,
193,194,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,§ 6. ORGANIC ILLNESS,"Field, M. J. (Margaret Joyce)",117,1970,"[159, 164, 743, 753, 776, 778]",fe12,"A very sensible and hard-working woman farmer,...",
194,195,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,§ 6. ORGANIC ILLNESS,"Field, M. J. (Margaret Joyce)",118,1970,"[159, 164, 743, 753, 778]",fe12,Again I found at another shrine an exceedingly...,
1337,1338,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,Family History,"Field, M. J. (Margaret Joyce)",374,1970,"[158, 159, 753, 754, 886]",fe12,Family mental history: Patient says many of he...,
1727,1728,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,Shrine,"Field, M. J. (Margaret Joyce)",440,1970,"[159, 753]",fe12,Mframa at Mframaso.,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36492,36493,Oceania,Polynesia,Tikopia,Rank and religion in Tikopia: a study in pagan...,CONCOMITANTS OF CONVERSION,"Firth, Raymond, 1901-2002",391,1970,"[154, 159, 177, 178, 593, 613, 614, 753, 771, ...",ot11,But clearly the mental processes involved were...,
36939,36940,Africa,Western Africa,Tiv,Tiv song,ANEKE TIRE,"Keil, Charles",[p. 140],1979,"[159, 525, 533, 752]",ff57,Aneke Tire was born on the night of a supernov...,
38740,38741,North-America,Northwest Coast and California,Tlingit,Social structure and social life of the Tlingi...,DANCE SOCIETIES,"Olson, Ronald L. (Ronald LeRoy), 1895-",[p.120-a],1967,"[159, 535, 567, 751, 787]",na12,I was a girl when I used to dance for Nawan. M...,
42498,42499,South-America,Amazon and Orinoco,Yanoama,The Sanema,Shirishana Family,"Wilbert, Johannes",93,1963,"[159, 753, 763, 764, 825]",sq18,He had gone out to the forest. There he had fo...,


In [15]:
# Quick search for OCMs SUBINDEX BY ANOTHER COLUMN
lst = ["159"] #enter your OCM strings here separated by a comma
msk = df.loc[df["Culture"]== "Akan"]['OCM'].apply(lambda x: not set(x).isdisjoint(lst))
out = df.loc[msk.index][msk]
out.head(4)

Unnamed: 0,Passage Number,Region,SubRegion,Culture,DocTitle,Section,Author,Page,Year,OCM,OWC,Passage,run_Info
192,193,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,§ 6. ORGANIC ILLNESS,"Field, M. J. (Margaret Joyce)",117,1970,"[159, 164, 743, 753, 778]",fe12,A few examples will illuminate the attitude of...,
193,194,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,§ 6. ORGANIC ILLNESS,"Field, M. J. (Margaret Joyce)",117,1970,"[159, 164, 743, 753, 776, 778]",fe12,"A very sensible and hard-working woman farmer,...",
194,195,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,§ 6. ORGANIC ILLNESS,"Field, M. J. (Margaret Joyce)",118,1970,"[159, 164, 743, 753, 778]",fe12,Again I found at another shrine an exceedingly...,
1337,1338,Africa,Western Africa,Akan,Search for security: an ethno-psychiatric stud...,Family History,"Field, M. J. (Margaret Joyce)",374,1970,"[158, 159, 753, 754, 886]",fe12,Family mental history: Patient says many of he...,


In [16]:
# (OPTIONAL)
# There are some passages that describe previous passages but do not contain information themselves like: 
# "Notes" or "End" or "Log"
# This code cell indicates (but does not remove) how many passages are short like the ones described which 
# may disrupt our OCM stats because they contain OCMs without actually having text that refers to these OCMs
shortPass_list = []
for i in df['Passage']:
    if len(i)<=10:
        shortPass_list.append(i)
print(f'Number of passages with text with 10 of fewer characters: {len(shortPass_list)}')

Number of passages with text with 10 of fewer characters: 15


In [17]:
# Save the cleaned version of the dataframe
df.to_excel("_Cleaned_Altogether.xlsx", index=False)
# NOTE: Some text editors like excel do not automatically load CSV's using UTF-8. There are ways to remedy this but for simplicity sake, export as an excel.
# comment the below text if you want a csv
# df.to_csv("_Cleaned_Altogether.csv", index=False)

## OCM Code Counting
Count every OCM within each culture. Do not count OCM's specified by the search (like if searched for 750-755, do not count these). 
<!-- - REMOVE all passages which are blank since we can't very well do lexical searches on them -->

In [18]:
# Make a copy of df_OCM as to not interfere with other analysis
df_OCM_freq = df_OCM.copy()
# Then turn the OCM's back to an integer (for removals)
df_OCM_freq['OCM'] = df_OCM_freq.OCM.apply(lambda x: int(x))
# only keep OCMs outside our search parameters whatever those are
df_sub_ex = df_OCM_freq.loc[(df_OCM_freq["OCM"]<750) | (df_OCM_freq["OCM"]>754)]

# Overwrite and create a new dataframe for OCM counts and frequencies
df_OCM_freq = pd.DataFrame(columns=["Culture","OCM","Frequency","Proportion_of_Passages"])
for key, val in culture_dict.items():
    value_count = df_sub_ex.loc[df_sub_ex["Culture"]==key]["OCM"].value_counts()
    # duplicate the culture word and asign it to each of its rows
    cult_count = [key] * len(value_count)
    # create a culture dataframe and append it to to the 
    df_OCM_Concat = pd.DataFrame({"Culture":cult_count,"OCM":value_count.index, "Frequency":value_count.values, "Proportion_of_Passages":value_count.values/val})
    df_OCM_freq = pd.concat([df_OCM_freq, df_OCM_Concat], ignore_index=True)
df_OCM_freq = df_OCM_freq.sort_values(by = ["Culture", "Frequency"], ascending= [True, False])
df_OCM_freq

Unnamed: 0,Culture,OCM,Frequency,Proportion_of_Passages
3333,Akan,159,38,0.306452
3334,Akan,778,25,0.201613
3335,Akan,784,13,0.104839
3336,Akan,755,11,0.088710
3337,Akan,793,9,0.072581
...,...,...,...,...
3098,Yanoama,595,1,0.018182
3099,Yanoama,606,1,0.018182
3100,Yanoama,784,1,0.018182
3101,Yanoama,781,1,0.018182


In [19]:
print(f'OCMs per culture: {sum(df_OCM_freq["Frequency"]) / len(set(df_OCM_freq["Culture"]))}')

OCMs per culture: 311.6


In [20]:
# Save the file
df_OCM_freq.to_csv("Culture_Frequency.csv", index=False)

## Association Rules for OCMs
Using Machine Learning, we will attempt to determine the co-occurance of OCMs. For example, if the OCM code 262 is present, what is the likelihood that both 751 and 752 would be present?

In [21]:
# Load resources
from mlxtend.preprocessing import TransactionEncoder

# We will use the apriori module to generate a dataframe that
# we can use for association rule finding
from mlxtend.frequent_patterns import apriori

# We will use the association_rules module to generate
# our association rules from the apriori output data frame
from mlxtend.frequent_patterns import association_rules





In [22]:
#Display important columns
df_smaller = df_OCM[['Culture', 'OCM','Passage']]
df_smaller

Unnamed: 0,Culture,OCM,Passage
0,Akan,752,"The fowl was then dissected, legs, wings, brea..."
1,Akan,782,"The fowl was then dissected, legs, wings, brea..."
2,Akan,796,"The fowl was then dissected, legs, wings, brea..."
3,Akan,753,"A priest will sometimes say to one whom he, th..."
4,Akan,769,"A priest will sometimes say to one whom he, th..."
...,...,...,...
25612,Yanoama,177,Younger people are distinguished by a gift for...
25613,Yanoama,191,Younger people are distinguished by a gift for...
25614,Yanoama,734,Younger people are distinguished by a gift for...
25615,Yanoama,752,Younger people are distinguished by a gift for...


In [23]:
# created a grouped dataframe object by Culture and Passage 
df_group = df_smaller.groupby(by = ['Culture', 'Passage'])
df_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fc478aba680>

In [24]:
def make_OCM_list(x):

    '''
    Will return a list of the unique items
    in a particular grouping when used with
    the agg method as its function
    '''

    return x.unique()

In [25]:
# Use the agg method and make_OCM_list
# to return a list of unique items for each ocm
# Note that depending on the filtering, there may be duplicate passages with different OCMs which are aggregated, 
# this method will combine them and extract the unique OCMs so it may not be a problem.
df_unique = df_group.agg(make_OCM_list)

In [26]:
list_trans = list(df_unique['OCM'])
list_trans = list_trans[0:]
len(list_trans)

6400

In [27]:
te = TransactionEncoder()
encoded_itemset = te.fit(list_trans).transform(list_trans)
print(encoded_itemset.shape) # show possible transcations and number of items
te.columns_



df_encoded = pd.DataFrame(encoded_itemset, columns = te.columns_)
df_encoded.head()

(6400, 504)


Unnamed: 0,0,101,102,103,104,105,113,114,115,121,...,885,886,887,888,890,900,901,902,903,984
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [28]:
# Before we begin, let's do a small
# amount of cleanup.  Let's remove all
# columns (items) that have less than 1 characters since that is just blank space
# more data cleaning my be required as time continues in case errors become evident in the scraped dataset
OCM_items = list(filter(lambda x: len(x) < 1, te.columns_ ))
print("removed: ",  OCM_items)
df_encoded = df_encoded.drop(columns=OCM_items) #remove small strings as they seem not to be items
print('How many unique items are left?', len(df_encoded.columns))

removed:  []
How many unique items are left? 504


In [29]:
# Use apriori to create a dataframe with columns of support and itemset lists
# Note that if your items are large compared to your sample (you have few rows but many columns) I reccommend using 
# a higher min_support as many more combinations may have spuriously higher support. Also, you can crash the program if too many are selected
df_support = apriori(df_encoded, min_support=0.01, use_colnames=True)
df_support.sort_values('support', inplace=True, ascending = False)
df_support

Unnamed: 0,support,itemsets
30,0.565937,(753)
44,0.217656,(776)
28,0.213594,(751)
32,0.187656,(755)
29,0.178437,(752)
...,...,...
127,0.010156,"(753, 826)"
69,0.010000,"(753, 171)"
140,0.010000,"(757, 824)"
64,0.010000,(846)


## Use association_rules to find the rules

Using the dataframe generated by `apriori`, find the association rules with the greatest lift.  See the [association_rules documentation](https://rasbt.github.io/mlxtend/api_modules/mlxtend.frequent_patterns/association_rules/) for how to do this.

Sort the resulting DataFrame by lift in descending order.  A lift > 1 indicates that the items are often purchased together and that buying X will increase the purchase of Y.  A lift of < 1 indicates the items are often substituted.  That is X is substituted for Y so X and Y don't appear together often.

Examine the resulting DataFrame.  For the association rule X -> Y, X is the column `antecedents` and Y is the column `consequents`.  If sorted you can see the metrics for each rule based upon the lift.

In [30]:
# Find the association rules
rules = association_rules(df_support, metric = 'lift', min_threshold=1.0)
# lift >1 more likely than chance X means you see Y
# lift = 1 as often as chance
# lift <1 (substitution) less likely than chance X means you see Y


In [31]:
# Sort the rules by lift
# and examine the output
# to find what rules were
# discovered
rules.sort_values('lift', ascending=False, inplace =True)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
4236,"(787, 755, 793, 775)","(752, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
3834,"(787, 793, 775, 776)","(752, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
5777,"(755, 787, 752, 782, 776)","(793, 539, 775)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
5826,"(787, 755, 752, 776)","(793, 539, 775, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
3964,"(787, 755, 752, 776)","(793, 539, 775)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
...,...,...,...,...,...,...,...,...,...
6167,(121),(753),0.019219,0.565937,0.010937,0.569106,1.005598,0.000061,1.007353
6086,(787),(751),0.094062,0.213594,0.020156,0.214286,1.003240,0.000065,1.000881
6087,(751),(787),0.213594,0.094062,0.020156,0.094367,1.003240,0.000065,1.000336
6168,(755),(754),0.187656,0.057344,0.010781,0.057452,1.001890,0.000020,1.000115


In [32]:
# look for OCM codes within the list
lst = frozenset(["793","226"])
msk = rules['antecedents'].apply(lambda x: not set(x).isdisjoint(lst))
out = rules.loc[msk]
out

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
4236,"(787, 755, 793, 775)","(752, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
3834,"(787, 793, 775, 776)","(752, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
5851,"(787, 793, 775, 776)","(752, 755, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
5810,"(787, 755, 793, 775)","(776, 752, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
5760,"(755, 793, 787, 775, 776)","(752, 539, 782)",0.028281,0.030156,0.024063,0.850829,28.214010,0.023210,6.501545
...,...,...,...,...,...,...,...,...,...
131,"(755, 793)",(752),0.050937,0.178437,0.039688,0.779141,4.366465,0.030598,3.719852
800,"(787, 793)",(776),0.035625,0.217656,0.033281,0.934211,4.292137,0.025527,11.891625
42,"(755, 793)",(776),0.050937,0.217656,0.046094,0.904908,4.157510,0.035007,8.227228
99,(793),(752),0.060469,0.178437,0.040469,0.669251,3.750617,0.029679,2.483943
