-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_write.py
153 lines (131 loc) · 5.65 KB
/
read_write.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
###########################################################################################################################
# This file provides the read/write functionalities for the input/output files. It reads and loads the claim file. It #
# writes the collected articles that belong to a certain claim (retrieved by searching Google) in a JSON formatted file #
# as an initial dump. It also writes the JSON file that is going to be used by D3 in the visualization stage of the #
# claim-map. #
# ______________________________________________________________________________________________________________________ #
# Author : Israa Qasim Jaradat , IDIR lab, University of Texas at Arlington #
###########################################################################################################################
import codecs
import json
from data_structures import *
from datetime import datetime
# called by the google_search's do_research() function
def read_claims(claims_file):
claims =[]
with codecs.open(claims_file, 'r', encoding='utf8') as f:
lines = f.readlines()
# lines = all claims
# line = 1 claim
i=0
for line in lines:
#print(line)
line=line.strip()
fields = line.split('\t')
dt = datetime.strptime(fields[2], '%Y-%m-%d')
c = Claim(i,fields[0],fields[1],fields[2],dt.year)
claims.append(c)
# claims = [(1,text,claimer, 23-23-3233, 2020),(2,text,claimer, 23-23-3233, 2020)]
i+=1
f.close()
return claims
# def __init__(self, id, text, claimer, date, year):
def read_claim(line):
c = Claim(0,line,'unknown','unknown','unknown')
return c
# line=line.strip()
# fields = line.split('\t')
# i=0
# if(len(fields) >= 3):
# dt = datetime.strptime(fields[2], '%Y-%m-%d')
# c = Claim(i,fields[0],fields[1],fields[2],dt.year)
# return c
# called by do_research
# writes json data for a single claim's articles
# this is called in a for loop iterating over each claim
def dump_data(param,claim):
data = {}
datatmp = {}
data['articles'] = []
datatmp['subarticles'] = []
for article in claim.articles:
for subarticle in article.articleurls:
datatmp['subarticles'].append({
'claim': claim.text,
'source': article.text,
'most_relevant_sent': subarticle.most_relevant_sent,
'publish_date': subarticle.publish_date,
'authors': subarticle.author,
'image': subarticle.image,
'keywords': subarticle.keywords,
'summary': subarticle.summary,
'stance': subarticle.stance,
'depth': subarticle.depth
})
data['articles'].append({
'claim': claim.text,
'depth': article.depth,
'most_relevant_sent': article.most_relevant_sent,
'publish_date': article.publish_date,
'authors': article.author,
'image': article.image,
'keywords': article.keywords,
'summary': article.summary,
'stance': article.stance,
'articles': datatmp['subarticles']
})
with open(param['output']+str(claim.id)+".json", 'w', encoding='utf-8') as out:
json.dump(data, out, ensure_ascii=False, indent=4)
def write_test_file(param,claim):
with codecs.open("../output_data/testing_sample.txt",'a',encoding='utf8') as out:
out.write("Claim :+"+claim+'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n')
for article in claim.articles:
out.write(article.text+'\n'+
'most relevant sentence 1: '+article.most_relevant_sent[0]+'\n'+
'most relevant sentence 2: '+article.most_relevant_sent[1]+'\n'+
'========================================================'+'\n')
out.close()
# called for every claim in do_research claims for loop
def write_json_visualization(param, claim):
data = {}
data['links'] = []
data['nodes'] = []
def add_article_node(article,parent):
# Append links frSom the article to the claim node
data['links'].append({
'source': parent.id,
'value': "1",
'target': article.id
})
data['nodes'].append({
'claimer': "NA",
'year': [article.year],
'id': article.id,
'type': 'evidence',
'date': str(article.publish_date),
'snippet': article.most_relevant_sent[1],
'source': article.url,
'truth_value': "NA"
})
# Recursively add subarticles as nodes
for subarticle in article.articleurls:
# Append links from the subarticle to the current article node
add_article_node(subarticle,article)
# Append the claim to the JSON file as a claim node
data['nodes'].append({
'claimer': claim.claimer,
'year': [claim.year],
'id': 0,
'type': 'claim',
'date': claim.date,
'snippet': claim.text,
'source': claim.claimer,
'truth_value': "NA"
})
# Append the relevant documents as evidence documents
for article in claim.articles:
add_article_node(article,claim)
# with open(param['json_visualization'], 'w', encoding='utf-8') as out:
# json.dump(data, out, ensure_ascii=False, indent=4)
json_string = json.dumps(data)
return json_string