In [1]:
import sklearn as sk
import nltk
import pandas as pd
import numpy as np
import os
from nltk.tokenize import sent_tokenize
import re
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 

### Building an inverted index for maintaining any kind of queries we perform a series of steps while parsing the  documents. Let's have a walkthrough while constructing our own information retrival.

![alt text](image.jpg "Title")


I want to create a search engine for all the documents in twitter text. I know what I seek. So I'll run a program which will go through the whole tree in text and collect the pages I want.  I will ask my program to retrieve the txt. So, once I get a document, I proceed to the next step.

1. Fetching the Document
The job is really simple if I get a text file (.txt). But if it was a doc or pdf, I'll need to parse them using some libraries to retrieve their text. Let's say I'm successful in reading the text. What next?

2. Removing the Stop Words
Consider the last paragraph. What were the important words we may be looking for? "text", "libraries", "doc", "pdf", "retrieve", "successful". But most of the other words are just a waste. We denote the most occurring words as "stop words" and remove them so that I don't get indexes for words like "I", "the", "we", "is", "an". In regular use, we have a list of 500-1000 words. But it may differ depending on use.

3. Stem to the Root Word
Then comes Stemming. Now whenever I want to search for "retrieval", I want to see a document that has information about it. But the word present in the document is called "retrieve" instead of "retrieval". To relate the both words, I'll chop some part of each and every word I read so that I could get the "root word". Retrieve may become "retriev". So will "retrieval". We have to be sure about the rules we use to chop the words. There are standard tools for performing this like "Porter's Stemmer". You can play around with a porter stemmer here : Porter Stemmer Online

4. Record Document IDs
Now get ready for the main task - Indexing.
Every document I have has got an unique document id. As I encounter a non-stop word that is stemmed now, I save it in my memory in the form :
retriev ==> docID104007

If I get same word in some other document, I may write
retriev ==> docID104007
retriev ==> docID154033

But very soon I've to combine them in a single list
retriev ==> docID104007&docID154033

I can further improve by writing how many timed did the word occur in the document so that we can rank the more important documents while retrieving.
retriev ==> docID104007|5|&docID154033|2|

5. Merge and Store the Terms
Finally, we save all of them in disk files. It's great if we sort the index based on the words for quick and easy retrieval.

This all obviously needs some specific data structures that simplify your job.

We can build further secondary indexes to improve the retrieval. There are lot of issues related to ranking too.

In [2]:
filename = 'tweets_corpus.txt'
stoplist = stopwords.words('english')
punctuations = '''!!!!!()-[]{};:'"\,!...<>./?@#$%^&*_~'''
DocID = {}
with open(filename) as fh:
    for line in fh:
        key, text = line.strip().split('\t', 1)
        DocID[key] = [word for word in text.strip().split() if word not in punctuations] # 
        #DocID[key]=[ word for word in  DocID[key] if word not in punctuations]
              

 

In [3]:
DocID

{'81499877556760576': ['Bandaging',
  'up',
  'my',
  'paper-cuts',
  'having',
  'cheesecake',
  'for',
  'dinner',
  'and',
  'calling',
  'it',
  'a',
  'night',
  "We're",
  'doin',
  'it',
  'big',
  'here',
  'in',
  'NYC'],
 '81500716438523904': ['I',
  "haven't",
  'had',
  'any',
  'krispy',
  'kremes',
  'or',
  'strawberry',
  'trifles',
  'since',
  'I',
  'started',
  'gym',
  'cries'],
 '81503002321616896': ['Bacon/cheddar',
  'slider',
  'topped',
  'w/fried',
  'egg',
  'Blue',
  'cheese',
  'slider',
  'topped',
  'w/avocado',
  'purple',
  'cherokee',
  'tomato'],
 '81507775422791680': ['Nacho',
  'w/',
  'cheese',
  'on',
  'my',
  'shirt',
  'Uggghhh'],
 '81534165975171072': ['you',
  'aint',
  'nuffin',
  'but',
  'a',
  'piece',
  'of',
  'cheese',
  'without',
  'the',
  'corners',
  'in',
  'other',
  'words',
  'you',
  'will',
  'never',
  'be',
  'a',
  'slice',
  'BITCH'],
 '81535634019323904': ['TAG_USERNAME',
  'TAG_USERNAME',
  'Mmmm',
  'cheese',
  'drea

In [4]:
inverted_index = dict()
for key in DocID:
    print (key)
    for text in DocID[key]:
        if text not in inverted_index:
            inverted_index[text]=[key]
        else:
            inverted_index[text].append(key)

81499877556760576
81500716438523904
81503002321616896
81507775422791680
81534165975171072
81535634019323904
81577509950459904
81582996083326976
81587643376336896
81600113016971264
81623945064890368
81644157432643584
81656304107651072
81673244926685184
81715158593966080
81716618236928000
81736742478155778
81842384404623360
81844590625304576
82461950231064576
82650970722533376
85032815321825280
85094773555335168
86441828815089664


In [5]:
inverted_index

{'Bandaging': ['81499877556760576'],
 'up': ['81499877556760576', '81644157432643584'],
 'my': ['81499877556760576',
  '81507775422791680',
  '81644157432643584',
  '81656304107651072',
  '81673244926685184'],
 'paper-cuts': ['81499877556760576'],
 'having': ['81499877556760576'],
 'cheesecake': ['81499877556760576', '81600113016971264', '81716618236928000'],
 'for': ['81499877556760576', '81715158593966080', '81716618236928000'],
 'dinner': ['81499877556760576'],
 'and': ['81499877556760576',
  '81587643376336896',
  '81656304107651072',
  '81673244926685184',
  '81715158593966080',
  '81716618236928000',
  '81736742478155778'],
 'calling': ['81499877556760576'],
 'it': ['81499877556760576',
  '81499877556760576',
  '81600113016971264',
  '81715158593966080',
  '81842384404623360',
  '82650970722533376'],
 'a': ['81499877556760576',
  '81534165975171072',
  '81534165975171072',
  '81535634019323904',
  '81582996083326976',
  '81587643376336896',
  '81600113016971264',
  '8164415743264

In [6]:
table_format=pd.DataFrame.from_dict(inverted_index, orient='index',columns=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
                                                                           'I', 'K', 'L', 'M','N'])

In [7]:
table_format['DOC ID'] = table_format[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
                                       'I', 'K', 'L', 'M','N']].apply(lambda x: ', '.join(x[x.notnull()]), axis = 1)

In [8]:
Invert_table=table_format.drop(columns=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
                                       'I', 'K', 'L', 'M','N'])

In [9]:
Invert_table

Unnamed: 0,DOC ID
Bandaging,81499877556760576
up,"81499877556760576, 81644157432643584"
my,"81499877556760576, 81507775422791680, 81644157..."
paper-cuts,81499877556760576
having,81499877556760576
cheesecake,"81499877556760576, 81600113016971264, 81716618..."
for,"81499877556760576, 81715158593966080, 81716618..."
dinner,81499877556760576
and,"81499877556760576, 81587643376336896, 81656304..."
calling,81499877556760576


### PROBLEM 2
#### This Function will generate intersecting the postings of single term, as well as process simple Boolean queries

In [10]:
DATA = [
    {
        'title': '81499877556760576',
        'description': 'Bandaging up my paper-cuts , having cheesecake for dinner , and calling it a night'
                        'We are doin it big here in NYC'
                        
    },
    {
        'title': '81500716438523904',
        'description': 'I havent had any krispy kremes or strawberry trifles since '
            'I started gym * cries *'
    },
    {
        'title': '81503002321616896',
        'description': 'Bacon/cheddar slider topped w/fried egg & Blue cheese slider topped w/avocado & purple cherokee tomato '
            
    },
    
     {'title': '81507775422791680',
        'description': 'Nacho w/ cheese on my shirt ! Uggghhh'},
     {'title': '81534165975171072',
        'description': 'you aint nuffin but a piece of cheese without the corners .. in other words , you will never be a slice . BITCH .'},

{'title': '81535634019323904',
        'description': 'TAG_USERNAME TAG_USERNAME Mmmm ... cheese ... dreaming of a squirrel burger with cheese !'},


{'title': '81577509950459904',
        'description': 'Mmmm cheese'},


{'title': '81582996083326976',
        'description': 'TAG_USERNAME 1st off Im like 1 year younger than u , 2nd age is just a number , 3rd ima cater ur wedding wit patty n cheese'},

{'title': '81587643376336896',
        'description': 'RT TAG_USERNAME : I want a steak and cheese egg roll right now .'},

{'title': '81600113016971264',
        'description': 'think imma eat some cheesecake befor i lay down ... Havent had it in a while'},
    
{'title': '81623945064890368',
        'description': 'A mixed one mostly strawberry peach little white cherry dr pepper or coke etc'},
{'title': '81644157432643584',
        'description': 'My stomach was yelling at me telling me to get my ass up nd get somethin to eat . So I went nd got cheesesticks nd a waterbottle .'},
{'title': '81656304107651072',
        'description': 'chocolate mint , cookies & cream , very berry strawberry , and chocolate caramel~ all blend perfectly in my mouth'},
{'title': '81673244926685184',
        'description': 'I think I want some cheese eggs and pancakes ... but will I cook ? Wheres my gf . This aint right to make such a hard decision'},
{'title': '81715158593966080',
        'description': 'TAG_USERNAME Totally . It is also good on fish , chicken , veggies . Oh , and desserts . Drizzling it over strawberry tarts for a party tonight ! -C'},
{'title': '81716618236928000',
        'description': 'TAG_USERNAME I want to get the strawberry cheesecake and candy bear . When does the 2 for $ 38 expire ?'},
{'title': '81736742478155778',
        'description': 'Made myself some scrambled eggs with cheese and bacon bits'},
    
{'title': '81842384404623360',
        'description': 'TAG_USERNAME you could try it but its not great haha if i could recommend , get the strawberry one thats ma fave !'},
{'title': '81844590625304576',
        'description': 'TAG_USERNAME then im 100% pure strawberry flavoured hmmm tasty'},
{'title': '82461950231064576',
        'description': 'just had the most beautiful pink coloured raspberry , strawberry & banana smoothie'},
{'title': '82650970722533376',
        'description': 'I went swimming , then ate asparagus bacon egg cheese biscuit goodness , then watched Date Night . It was ... it was good . TAG_FINAL_HASHTAGS'},
{'title': '85032815321825280',
        'description': 'RT TAG_USERNAME : RT TAG_USERNAME : Pancakes , bacon , eggs w/ cheese , & hashbrown casserole on deck TAG_HASHTAGS  ~i want sum !!!!! Ok its good too'},
    
{'title': '85094773555335168',
        'description': 'Cheese hashbrowns , turkey bacon , veggie tofu scramble , rice , french toast , scrambled eggs , strawberries & cantaloupe . TAG_FINAL_HASHTAGS'},
{'title': '86441828815089664',
        'description': 'TAG_HASHTAGS using cream cheese icing on chocolate cake .. just use vanilla or strawberry'},

 ]

In [11]:
import re
from collections import defaultdict, Counter

def bold(txt):
    return '\x1b[1m%s\x1b[0m' % txt



SPLIT_RE = re.compile(r'[^a-zA-Z0-9]')
def tokenize(text):
    yield from SPLIT_RE.split(text)

def text_only(tokens):
    for t in tokens:
        if t.isalnum():
            yield t

def lowercase(tokens):
    for t in tokens:
        yield t.lower()

def stem(tokens):
    for t in tokens:
        if t.endswith('ly'):
            t = t[:-2]
        yield t

SYNONYMS = {
    'rapid': 'quick',
}
def synonyms(tokens):
    for t in tokens:
        yield SYNONYMS.get(t, t)

def analyze(text):
    tokens = tokenize(text)
    for token_filter in (text_only, lowercase, stem, synonyms):
        tokens = token_filter(tokens)
    yield from tokens

def index_docs(docs, *fields):
    index = defaultdict(lambda: defaultdict(Counter))
    for id, doc in enumerate(docs):
        for field in fields:
            for token in analyze(doc[field]):
                index[field][token][id] += 1
    return index

def combine_and(*args):
    if not args:
        return Counter()
    out = args[0].copy()
    for c in args[1:]:
        for doc_id in list(out):
            if doc_id not in c:
                del out[doc_id]
            else:
                out[doc_id] += c[doc_id]
    return out

def combine_or(*args):
    if not args:
        return Counter()
    out = args[0].copy()
    for c in args[1:]:
        out.update(c)
    return out

COMBINE = {
    'OR': combine_or,
    'AND': combine_and,
}

def search_in_fields(index, query, fields):
    for t in analyze(query):
        yield COMBINE['OR'](*(index[f][t] for f in fields))

def search(index, query, operator='AND', fields=None):
    combine = COMBINE[operator]
    return combine(*(search_in_fields(index, query, fields or index.keys())))

def query(index, query, operator='AND', fields=None):
    print('Search for "%s" using %s in %s' % (bold(query), bold(operator), fields or 'all fields'))
    print('-'*80)
    ids = search(index, query, operator, fields)
    for doc_id, score in ids.most_common():
        print('%s found with score of %s' % (bold(DATA[doc_id]['title']), bold(score)))
    print('\n')
    


In [12]:
#SAMPLE
index = index_docs(DATA, 'title', 'description')

query(index, 'egg', 'OR')
query(index, 'I' )
query(index, 'NYC')
query(index, 'NYC', 'OR')
query(index, 'straberry', 'OR')
query(index, 'cheesecake')
query(index, 'gym')
query(index, 'of')


Search for "[1megg[0m" using [1mOR[0m in all fields
--------------------------------------------------------------------------------
[1m81503002321616896[0m found with score of [1m1[0m
[1m81587643376336896[0m found with score of [1m1[0m
[1m82650970722533376[0m found with score of [1m1[0m


Search for "[1mI[0m" using [1mAND[0m in all fields
--------------------------------------------------------------------------------
[1m81673244926685184[0m found with score of [1m3[0m
[1m81500716438523904[0m found with score of [1m2[0m
[1m81587643376336896[0m found with score of [1m1[0m
[1m81600113016971264[0m found with score of [1m1[0m
[1m81644157432643584[0m found with score of [1m1[0m
[1m81716618236928000[0m found with score of [1m1[0m
[1m81842384404623360[0m found with score of [1m1[0m
[1m82650970722533376[0m found with score of [1m1[0m
[1m85032815321825280[0m found with score of [1m1[0m


Search for "[1mNYC[0m" using [1mAND[0m in all f

#### Now we can do the following procedure to obtain results for two or more queries using below function
When you Shift+Enter You will see the following input box where we can put our query and Shift+enter once again to see the result. I have provided couple examples below too
![alt text](insert.jpg "Title")

In [13]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.tokenize import sent_tokenize , word_tokenize
import glob
import re
import os
import numpy as np
import sys
Stopwords = set(stopwords.words('english'))

def finding_all_unique_words_and_freq(words):
    words_unique = []
    word_freq = {}
    for word in words:
        if word not in words_unique:
            words_unique.append(word)
    for word in words_unique:
        word_freq[word] = words.count(word)
    return word_freq
def finding_freq_of_word_in_doc(word,words):
    freq = words.count(word)
        
def remove_special_characters(text):
    regex = re.compile('[^a-zA-Z0-9\s]')
    text_returned = re.sub(regex,'',text)
    return text_returned

class Node:
    def __init__(self ,docId, freq = None):
        self.freq = freq
        self.doc = docId
        self.nextval = None
    
class SlinkedList:
    def __init__(self ,head = None):
        self.head = head

        
all_words = []
dict_global = {}
file_folder = 'tweets_corpus.txt'
idx = 1
files_with_index = {}
for file in glob.glob(file_folder):
    print(file)
    fname = file
    file = open(file , "r")
    text = file.read()
    text = remove_special_characters(text)
    text = re.sub(re.compile('\d'),'',text)
    sentences = sent_tokenize(text)
    words = word_tokenize(text)
    words = [word for word in words if len(words)>1]
    words = [word.lower() for word in words]
    words = [word for word in words if word not in Stopwords]
    dict_global.update(finding_all_unique_words_and_freq(words))
    files_with_index[idx] = os.path.basename(fname)
    idx = idx + 1
    
unique_words_all = set(dict_global.keys())

tweets_corpus.txt


In [14]:
linked_list_data = {}
for word in unique_words_all:
    linked_list_data[word] = SlinkedList()
    linked_list_data[word].head = Node(1,Node)
word_freq_in_doc = {}
idx = 1
for file in glob.glob(file_folder):
    file = open(file, "r")
    text = file.read()
    text = remove_special_characters(text)
    text = re.sub(re.compile('\d'),'',text)
    sentences = sent_tokenize(text)
    words = word_tokenize(text)
    words = [word for word in words if len(words)>1]
    words = [word.lower() for word in words]
    words = [word for word in words if word not in Stopwords]
    word_freq_in_doc = finding_all_unique_words_and_freq(words)
    for word in word_freq_in_doc.keys():
        linked_list = linked_list_data[word].head
        while linked_list.nextval is not None:
            linked_list = linked_list.nextval
        linked_list.nextval = Node(idx ,word_freq_in_doc[word])
    idx = idx + 1

In [15]:
query = input('Enter your query:')
query = word_tokenize(query)
connecting_words = []
cnt = 1
different_words = []
for word in query:
    if word.lower() != "and" and word.lower() != "or" and word.lower() != "not":
        different_words.append(word.lower())
    else:
        connecting_words.append(word.lower())
print(connecting_words)
total_files = len(files_with_index)
zeroes_and_ones = []
zeroes_and_ones_of_all_words = []
for word in (different_words):
    if word.lower() in unique_words_all:
        zeroes_and_ones = [0] * total_files
        linkedlist = linked_list_data[word].head
        print(word)
        while linkedlist.nextval is not None:
            zeroes_and_ones[linkedlist.nextval.doc - 1] = 1
            linkedlist = linkedlist.nextval
        zeroes_and_ones_of_all_words.append(zeroes_and_ones)
    else:
        print(word," not found")
        sys.exit()
print(zeroes_and_ones_of_all_words)
for word in connecting_words:
    word_list1 = zeroes_and_ones_of_all_words[0]
    word_list2 = zeroes_and_ones_of_all_words[1]
    if word == "and":
        bitwise_op = [w1 & w2 for (w1,w2) in zip(word_list1,word_list2)]
        zeroes_and_ones_of_all_words.remove(word_list1)
        zeroes_and_ones_of_all_words.remove(word_list2)
        zeroes_and_ones_of_all_words.insert(0, bitwise_op);
    elif word == "or":
        bitwise_op = [w1 | w2 for (w1,w2) in zip(word_list1,word_list2)]
        zeroes_and_ones_of_all_words.remove(word_list1)
        zeroes_and_ones_of_all_words.remove(word_list2)
        zeroes_and_ones_of_all_words.insert(0, bitwise_op);
    elif word == "not":
        bitwise_op = [not w1 for w1 in word_list2]
        bitwise_op = [int(b == True) for b in bitwise_op]
        zeroes_and_ones_of_all_words.remove(word_list2)
        zeroes_and_ones_of_all_words.remove(word_list1)
        bitwise_op = [w1 & w2 for (w1,w2) in zip(word_list1,bitwise_op)]
zeroes_and_ones_of_all_words.insert(0, bitwise_op);
        
files = []    
print(zeroes_and_ones_of_all_words)
lis = zeroes_and_ones_of_all_words[0]
cnt = 1
for index in lis:
    if index == 1:
        files.append(files_with_index[cnt])
    cnt = cnt+1
    
print(files)

Enter your query:Cheesecake AND Eggs
['and']
cheesecake
eggs
[[1], [1]]
[[1], [1]]
['tweets_corpus.txt']


####  EXAMPLE
Enter your query: egg AND cheese
['and']
egg
cheese
[[1], [1]]
[[1], [1]]

Enter your query: chocolate AND strawberry
['and']
chocolate
strawberry
[[1], [1]]
[[1], [1]]
['tweets_corpus.txt'

Enter your query: eggs AND cheese AND bacon
['and', 'and']
eggs
cheese
bacon
[[1], [1], [1]]
[[1], [1]]

Enter your query:chocolate OR strawberry
['or']
chocolate
strawberry
[[1], [1]]
[[1], [1]]

Enter your query:eggs AND cheese OR eggs AND bacon
['and', 'or', 'and']
eggs
cheese
eggs
bacon
[[1], [1], [1], [1]]
[[1], [1]]

#### Calculating TF_IDF scores for retrived indexes

In [17]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

In [42]:
file ='tweets_corpus.txt'
file = open(file , 'r')
text = file.read()

In [43]:
corpus = nltk.sent_tokenize(text)

for i in range(len(corpus )):
    corpus [i] = corpus [i].lower()
    corpus [i] = re.sub(r'\W',' ',corpus [i])
    corpus [i] = re.sub(r'\s+',' ',corpus [i])

wordfreq = {}
for sentence in corpus:
    tokens = nltk.word_tokenize(sentence)
    for token in tokens:
        if token not in wordfreq.keys():
            wordfreq[token] = 1
        else:
            wordfreq[token] += 1

import heapq
most_freq = heapq.nlargest(200, wordfreq, key=wordfreq.get)

In [44]:
word_idf_values = {}
for token in most_freq:
    doc_containing_word = 0
    for document in corpus:
        if token in nltk.word_tokenize(document):
            doc_containing_word += 1
    word_idf_values[token] = np.log(len(corpus)/(1 + doc_containing_word))


In [45]:
word_tf_values = {}
for token in most_freq:
    sent_tf_vector = []
    for document in corpus:
        doc_freq = 0
        for word in nltk.word_tokenize(document):
            if token == word:
                  doc_freq += 1
        word_tf = doc_freq/len(nltk.word_tokenize(document))
        sent_tf_vector.append(word_tf)
    word_tf_values[token] = sent_tf_vector

In [48]:
tfidf_values = []
for token in word_tf_values.keys():
    tfidf_sentences = []
    for tf_sentence in word_tf_values[token]:
        tf_idf_score = tf_sentence * word_idf_values[token]
        tfidf_sentences.append(tf_idf_score)
    tfidf_values.append(tfidf_sentences)

In [49]:
tf_idf_model = np.asarray(tfidf_values)

In [51]:
tf_idf_model=pd.DataFrame(tf_idf_model)

In [52]:
tf_idf_model

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,0.000000,0.000000,0.041645,0.037860,0.0,0.138818,0.058110,0.000000,0.000000,0.026868,...,0.000000,0.000000,0.000000,0.000000,0.025240,0.021919,0.000000,0.052057,0.050479,0.000000
1,0.000000,0.000000,0.041645,0.000000,0.0,0.000000,0.038740,0.016658,0.092545,0.080604,...,0.000000,0.000000,0.064070,0.000000,0.025240,0.021919,0.000000,0.000000,0.025240,0.000000
2,0.062551,0.000000,0.000000,0.085297,0.0,0.078189,0.043640,0.037531,0.104252,0.000000,...,0.000000,0.104252,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.0,0.176009,0.049119,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.081235,0.000000,0.032002,0.027791,0.000000,0.000000,0.064003,0.000000
4,0.062551,0.117284,0.000000,0.000000,0.0,0.000000,0.000000,0.018765,0.000000,0.000000,...,0.000000,0.104252,0.000000,0.000000,0.028432,0.000000,0.375308,0.000000,0.000000,0.187654
5,0.000000,0.000000,0.023457,0.000000,0.0,0.000000,0.000000,0.018765,0.000000,0.030267,...,0.000000,0.104252,0.072175,0.000000,0.028432,0.049383,0.000000,0.000000,0.028432,0.000000
6,0.079306,0.000000,0.000000,0.000000,0.0,0.000000,0.027665,0.000000,0.000000,0.076747,...,0.396528,0.000000,0.091506,0.000000,0.036048,0.000000,0.000000,0.000000,0.000000,0.000000
7,0.089582,0.000000,0.033593,0.000000,0.0,0.000000,0.000000,0.053749,0.000000,0.043346,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,0.000000,0.000000,0.033593,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.040719,0.035361,0.000000,0.083983,0.040719,0.000000
9,0.000000,0.000000,0.000000,0.061079,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.103364,0.191962,0.040719,0.035361,0.000000,0.000000,0.000000,0.000000
