In [1]:
import json
from tqdm import tqdm
from class_declarations import Conversation, Email

from collections import Counter

# Load Conversation objects and construct Triples
---

In [2]:
mailinglist = "ietf-http-wg"
with open(f"email_data/{mailinglist}/all.json") as handle:
    mail_dicts = json.load(handle)

short = 100

conv_iter = zip(range(short), ((subj_str, mail_ls) for period, subj_d in mail_dicts.items() 
                                            for subj_str, mail_ls in subj_d.items()))

convos = [Conversation(subj_str, mail_ls) for _, (subj_str, mail_ls) in tqdm(conv_iter, total=short)]

100%|██████████| 100/100 [00:59<00:00,  1.69it/s]


In [None]:
talked_to = [(str(e.sender), "talked_to", str(e.receiver)) for c in convos for e in c.emails]
involved_in = [(str(c), "involved_in", str(p)) for c in convos for p in c.interlocutors]

In [None]:
len(involved_in)

In [None]:
for e in convos[29]:
    print(e.body)
    print("\n\n\n\n\n")

In [None]:
for i, c in enumerate(convos[:100]):
    
    if str(hash(c)).startswith("624"):
        print(i)
        print(hash(c))
        print(c)

# NER and Outside Entity Linking

In [None]:
import stanza
stanza.download('en') # download English model
nlp = stanza.Pipeline('en', processors="tokenize,ner") # initialize English neural pipeline

In [None]:
doc = nlp("hello, the W3C has done")

print(doc.entities)

# Neo4j
---

In [None]:
from neo4j import GraphDatabase
from neo4j_defs import *
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "pwd"), encrypted=False)

In [None]:
def clear(tx):
    tx.run("""MATCH (n)
            DETACH DELETE n""")


with driver.session() as session:
    session.write_transaction(clear)
    for c in tqdm(convos[:100]):
        session.write_transaction(add_conversation, c)
        session.write_transaction(add_documents, c)
        session.write_transaction(add_mentions, c)
            
        for email in c:
            session.write_transaction(add_person,email.sender)
            session.write_transaction(add_person,email.receiver)
            session.write_transaction(add_talked_to,email.sender,email.receiver)
            
#             session.write_transaction(add_named_entities, email)
            
        for i in c.interlocutors:
            session.write_transaction(connect_conversation, i.name, c)
            
            
#     sorted_convos = sorted(convos, key=lambda (c1, c2): c1 <)
#     pairs = zip(sorted_convos[:-1], sorted_convos[1:])
    
#     for c1, c2 in pairs:
#         session.write_transaction(add_earlier_than, email)

---
# Identify Quoted Texts


In [32]:
from Levenshtein import distance as levenshtein

In [17]:
for e in convos[1]:
    print(e.body)
    print("____________________\n\n\n\n\n____________________")


<https://github.com/httpwg/http-extensions/issues/69>

Julian points out that we should say how extension parameters are handled in Alt-Svc.

Straw-man proposal:

* must-ignore
* register with IANA, Specification Required (implies expert review)

Thoughts?

--
Mark Nottingham   https://www.mnot.net/

____________________




____________________

On 27/05/2015 6:40 p.m., Mark Nottingham wrote:
> <https://github.com/httpwg/http-extensions/issues/69>
> 
> Julian points out that we should say how extension parameters are handled in Alt-Svc.
> 
> Straw-man proposal:
> 
> * must-ignore
> * register with IANA, Specification Required (implies expert review)
> 
> Thoughts?

Another straw-man:

 * must-ignore the service (not just extension)

Amos

____________________




____________________

On 27 May 2015 at 02:06, Amos Jeffries <squid3@treenet.co.nz> wrote:
>
> Another straw-man:
>
>  * must-ignore the service (not just extension)

That's a stronger statement, but I think that it's a good

In [45]:
i = 3
latest = convos[i][-1]

for l in latest.body.split("\n"):
    if not l.strip():
        continue
    print(":", l)
    
    for e_ in convos[1][:-1]:
        quoted = [l_ for l_ in e_.body.split("\n") if levenshtein(l, l_) < (min(len(l), len(l_))/2)]
        
        print(quoted)
        
    print("\n---")


: +1
[]
[]
[]

---
: > On 28 May 2015, at 2:25 am, Martin Thomson <martin.thomson@gmail.com> wrote:
[]
[]
[]

---
: > 
[]
['> ', '> ', '> ', '> ']
[]

---
: > On 27 May 2015 at 02:06, Amos Jeffries <squid3@treenet.co.nz> wrote:
[]
[]
['On 27 May 2015 at 02:06, Amos Jeffries <squid3@treenet.co.nz> wrote:']

---
: >> 
[]
[]
[]

---
: >> Another straw-man:
[]
['Another straw-man:']
['> Another straw-man:']

---
: >> 
[]
[]
[]

---
: >> * must-ignore the service (not just extension)
[]
[' * must-ignore the service (not just extension)']
['>  * must-ignore the service (not just extension)']

---
: > 
[]
['> ', '> ', '> ', '> ']
[]

---
: > That's a stronger statement, but I think that it's a good tweak.
[]
[]
["That's a stronger statement, but I think that it's a good tweak."]

---
: > 
[]
['> ', '> ', '> ', '> ']
[]

---
: > It means that I can require support of an extension in order to use the service.
[]
[]
['It means that I can require support of an extension in order to use the servic

In [50]:
for e in convos[1]:
    print(e.sender, e.receiver)
    print(e.body)
    
    print("__________________\n\n\n__________________")

<Mark Nottingham (mnot@mnot.net)> <HTTP Working Group (ietf-http-wg@w3.org)>

<https://github.com/httpwg/http-extensions/issues/69>

Julian points out that we should say how extension parameters are handled in Alt-Svc.

Straw-man proposal:

* must-ignore
* register with IANA, Specification Required (implies expert review)

Thoughts?

--
Mark Nottingham   https://www.mnot.net/

__________________


__________________
<Amos Jeffries (squid3@treenet.co.nz)> <NO_NAME (ietf-http-wg@w3.org)>

On 27/05/2015 6:40 p.m., Mark Nottingham wrote:
> <https://github.com/httpwg/http-extensions/issues/69>
> 
> Julian points out that we should say how extension parameters are handled in Alt-Svc.
> 
> Straw-man proposal:
> 
> * must-ignore
> * register with IANA, Specification Required (implies expert review)
> 
> Thoughts?

Another straw-man:

 * must-ignore the service (not just extension)

Amos

__________________


__________________
<Martin Thomson (martin.thomson@gmail.com)> <Amos Jeffries (squid3@