In [1]:
import os
%cd /content
!git clone --recursive https://github.com/BiteKirby3/FakeNewsChallenge
root_dir = "/content/FakeNewsChallenge/fnc-1-baseline"
os.chdir(root_dir)

/content
Cloning into 'FakeNewsChallenge'...
remote: Enumerating objects: 36, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 36 (delta 7), reused 31 (delta 4), pack-reused 0[K
Unpacking objects: 100% (36/36), 4.30 MiB | 3.45 MiB/s, done.


In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import sklearn
import tensorflow as tf
import tqdm
import scipy 
import nltk
from datetime import date
import csv

#Data preprocessing

##Data Loading

We load the stances and article bodies into two separate containers.

In [3]:
from utils.dataset import DataSet

In [4]:
dataset_train = DataSet("train")
dataset_test = DataSet("test")

Reading dataset
Total stances: 49972
Total bodies: 1683
Reading dataset
Total stances: 25413
Total bodies: 904


We can access it through the *.stances* and *.articles* variables. Moreover, *.articles* is a dictionary of articles, indexed by the body id.

In [13]:
dataset_test.stances[0]

{'Headline': 'Ferguson riots: Pregnant woman loses eye after cops fire BEAN BAG round through car window',
 'Body ID': 2008,
 'Stance': 'unrelated'}

In [14]:
print(dataset_test.articles[dataset_test.stances[0]['Body ID']])

A RESPECTED senior French police officer investigating the Charlie Hebdo magazine massacre took his own life mere hours after the horrific attacks stunned the world.

Commissioner Helric Fredou, 45, turned a gun on himself in his police office in Limoges last Wednesday night, reported France 3.

A colleague found his body at 1am on Thursday, the day after three gunmen fired at the satirical magazine's office and left 12 people dead.

Speaking to our sister publication Mirror Online, the Union of Commissioners of the National Police confirmed Mr Fredou had taken his own life.


In a statement released after his death, a union spokesman said: "It is with great sadness that we were informed this morning of the death of our colleague Helric Fredou, assigned as Deputy Director of the Regional Service Judicial Police in Limoges.

"On this particular day of national mourning, police commissioners are hit hard by the tragic death of one of their own.

"The Union of Commissioners of the Nationa

#Pre-train, Prompt and Predict

The idea is rather intuitive and simple to apply, in our case, news stance classification can be transformed into asking chatGPT a question, for example,

**What's the stance of the news body:**
$<Body>$

**to the news headline:**
$<Headline>$? 

**Choose a stance from "unrelated, discuss, agree, disagree". The stance is**

Then wait for the completion of the sentence answered by chatGPT.

This can be implemented through OpenAI's officiel API. 
We define a prompt template that includes the news headline as well as the news body and asks for the stance of the news body towards it. Then we filter the generated stances to select the relevant stance. 

See OpenAI's [Chat Completion API](https://platform.openai.com/docs/api-reference/chat) for the detailed usages. 

In [8]:
!pip install --upgrade openai

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openai
  Downloading openai-0.27.4-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.3/70.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
Collecting async-timeout<5.0,>=4.0.0a3
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.2/114.2 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting yarl<2.0,>=1.0
  Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)
[2K    

In [9]:
import os
import openai

In [10]:
API_KEY = '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'
openai.api_key = API_KEY

In [11]:
def classify_stance(news_body, news_headline):
    # Define prompt template
    headline_template = "Given the following news headline: '{}'."
    body_template = "Given the following news body: '{}'."
    question_template = "What is the stance of this news body towards this news headline? Please choose one of the following stances: unrelated, discuss, agree, disagree. The stance is "

    # Generate prompt
    news_body = news_body.replace("\n\n","\n")
    news_body = news_body.replace("\n\n","\n")
    headline_template = headline_template.format(news_headline)
    body_template = body_template.format(news_body)

    # Generate possible stances using ChatGPT
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": headline_template},
                  {"role": "user", "content": body_template},
                  {"role": "user", "content": question_template}
                  ],
        temperature = 0.5
    )

    # Filter stances to select the most relevant one
    stance = response.choices[0].message.content
    if stance[-1]=="." :
      stance = stance.rstrip(stance[-1])
    #if stance in ["unrelated", "discuss", "agree", "disagree"]:
    #    return stance
    #else:
    #    return None
    return stance

In [11]:
#Test 
print(dataset_test.stances[248])
print("The stance given by GPT3.5 is: "+classify_stance(dataset_test.articles[dataset_test.stances[248]['Body ID']], dataset_test.stances[248]['Headline']))

{'Headline': '‘Crabzilla’ spotted off the coast of Britain', 'Body ID': 893, 'Stance': 'disagree'}
The stance given by GPT3.5 is: disagree


In [None]:
#Classify the test dataset, we write the GPT responses to a csv file.\
import time
filename = "/content/FakeNewsChallenge/result/"+"prompt_prediction"+str(date.today())+".csv"
with open(filename, 'w', newline='') as csvfile:
    fieldnames = ["STANCE_INDEX","ACTUAL_STANCE","PREDICT_STANCE"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(len(dataset_test.stances)):
      print(i)
      headline = dataset_test.stances[i]['Headline']
      body = dataset_test.articles[dataset_test.stances[i]['Body ID']]
      actual_stance = dataset_test.stances[i]['Stance']
      predict_stance = classify_stance(dataset_test.articles[dataset_test.stances[i]['Body ID']], dataset_test.stances[i]['Headline'])
      writer.writerow({'STANCE_INDEX': str(i), 'ACTUAL_STANCE': str(actual_stance), 'PREDICT_STANCE': str(predict_stance)})
      csvfile.flush()
      time.sleep(21)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [24]:
len(dataset_test.stances)

25413

Note that the quality of classification depends on the quality of the prompt, thus it may require some fine-tuning to obtain accurate results.

#Scoring classifier

In [8]:
from utils.score import report_score

In [34]:
predict_stances = []
for stance in dataset_test.stances:
  predict_stances.append(stance.get('Stance'))

In [35]:
report_score(predict_stances, predict_stances)

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |   1903    |     0     |     0     |     0     |
-------------------------------------------------------------
| disagree  |     0     |    697    |     0     |     0     |
-------------------------------------------------------------
|  discuss  |     0     |     0     |   4464    |     0     |
-------------------------------------------------------------
| unrelated |     0     |     0     |     0     |   18349   |
-------------------------------------------------------------
Score: 11651.25 out of 11651.25	(100.0%)


100.0