##### Loading the dataset

In [10]:
from datasets import load_dataset
# Login using `huggingface-cli login` to access this dataset
ds = load_dataset("newsmediabias/FAKE-NEWS-BIASES-LABELLED")

In [11]:
print(ds["train"])

Dataset({
    features: ['text', 'source', 'date_published', 'keyword_category', 'outlet', 'label_bias', 'dimension', 'aspect', 'biased_phrases', 'debias_text'],
    num_rows: 4099
})


##### Converting Dataset to Dataframe

In [12]:
import pandas as pd
df = pd.DataFrame(ds['train'])
df

Unnamed: 0,text,source,date_published,keyword_category,outlet,label_bias,dimension,aspect,biased_phrases,debias_text
0,After three very public failures in balloting ...,https://news.google.com/rss/articles/CBMiMWh0d...,,Political Dimension,BBC.com,biased,Political Dimension,,"['martyr for', 'party establishment'],","'After three failures in public voting, his en..."
1,"WASHINGTON, Oct. 20 (Xinhua) -- The U.S. House...",https://news.google.com/rss/articles/CBMiSGh0d...,45219,Political Dimension,Xinhua,biased,Political Dimension,,"['right-wing Republican', 'political polarizat...",'The U.S. House of Representatives remains wit...
2,They marched on the White House to make their ...,https://news.google.com/rss/articles/CBMiZWh0d...,45219,Political Dimension,Vox.com,non-biased,Political Dimension,,"['progressive vision', 'mainstream progressive...",'They marched on the White House to make their...
3,Recent editorials in leading liberal US newspa...,https://news.google.com/rss/articles/CBMibWh0d...,,Political Dimension,Middle East Eye,biased,Political Dimension,,"['liberal US newspapers', 'left-wing members'],",'Recent editorials in major US newspapers have...
4,Following the Labour Party governments histor...,https://news.google.com/rss/articles/CBMiOWh0d...,,Political Dimension,WSWS,non-biased,Political Dimension,,"['moved sharply to the right', 'a surge in ...",'Following the Labour Party governments defea...
...,...,...,...,...,...,...,...,...,...,...
4094,Editors Note: Patrick T. Brown is a fellow at...,https://news.google.com/rss/articles/CBMidmh0d...,45188,Political Dimension,CNN,biased,Political Dimension,,"['conservative critiques', 'K-12 education'],",'Glenn Youngkin won Virginia's gubernatorial e...
4095,Who are the Liberal Democrats? Illustration b...,https://news.google.com/rss/articles/CBMiYGh0d...,45180,Political Dimension,PoliticsHome,biased,Political Dimension,,"['Liberal Democrats', 'third largest party'],",'Who are the Liberal Democrats? Buoyed by a su...
4096,"Donald Trump attends a campaign rally in Waco,...",https://news.google.com/rss/articles/CBMiXGh0d...,,Political Dimension,Gulf Today,biased,Political Dimension,,"['right-wing Republican', 'Democratic Hillary ...",'Donald Trump attends a campaign rally in Waco...
4097,Joe Biden lied repeatedly when he claimed he k...,https://news.google.com/rss/articles/CBMikwFod...,45172,Political Dimension,Las Vegas Review-Journal,biased,Political Dimension,,"['Joe Biden lied', 'influence-peddling busines...",'Joe Biden made false statements about his kno...


##### Remove unnecessary column


In [13]:
df = df.drop(columns='date_published', axis=1)
df = df.drop(columns='aspect', axis=1)
df = df.drop(columns='keyword_category', axis=1)
df = df.drop(columns='outlet', axis=1)
df = df.drop(columns='dimension', axis=1)
df = df.drop(columns='biased_phrases', axis=1)
df = df.drop(columns='debias_text', axis=1)
df = df.drop(columns='source', axis=1)


In [14]:
df.head()

Unnamed: 0,text,source,label_bias
0,After three very public failures in balloting ...,https://news.google.com/rss/articles/CBMiMWh0d...,biased
1,"WASHINGTON, Oct. 20 (Xinhua) -- The U.S. House...",https://news.google.com/rss/articles/CBMiSGh0d...,biased
2,They marched on the White House to make their ...,https://news.google.com/rss/articles/CBMiZWh0d...,non-biased
3,Recent editorials in leading liberal US newspa...,https://news.google.com/rss/articles/CBMibWh0d...,biased
4,Following the Labour Party governments histor...,https://news.google.com/rss/articles/CBMiOWh0d...,non-biased


In [15]:
df.to_csv('Dataset.csv')

In [16]:
###Drop Nan Values
df=df.dropna()

##### Splitting features and labels

In [17]:
X=df.drop('label_bias', axis=1)
y=df['label_bias']

In [18]:
y.value_counts()

label_bias
non-biased    2361
biased        1738
Name: count, dtype: int64

In [19]:
print(X.shape)
print(y.shape)

(4099, 2)
(4099,)


In [20]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dropout
vocab_size = 5000

In [21]:
messages=X.copy()
messages.reset_index(inplace=True)

In [22]:
import nltk
import re
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\parik\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

##### Dataset Preprocessing


In [23]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    print(i)
    review = re.sub('[^a-zA-Z]', ' ', messages['text'][i])
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [24]:
corpus

['three public failur ballot full hous end came quietli secret ballot basement meet fellow hous republican fate make martyr parti right wing view defeat evid parti establish insuffici dedic conserv valu',
 'washington oct xinhua u hous repres remain rudderless right wing republican jim jordan chairman hous judiciari committe republican nomine speaker fail garner enough support elect surpris u politician addict wildli pursu person partisan interest even govern shutdown immin peopl interest deepli damag polit polar defac american democraci repeat failur',
 'march white hous make demand clear biden broker ceasefir releas hostag held hama milit forc american condemn israel bomb gaza eventu major chang current isra govern treatment palestinian protestor gather monday afternoon progress jewish american activist organ repres variou segment us polit left palestinian isra american jewish muslim peopl longtim activist newer alli sympathet nuanc posit peac reform israel palestin also repres mains

In [25]:
onehot_repr=[one_hot(words,vocab_size)for words in corpus] 
onehot_repr

[[3595,
  1187,
  3206,
  173,
  2900,
  1366,
  2585,
  3967,
  1130,
  2654,
  173,
  3925,
  1413,
  1447,
  1366,
  3572,
  827,
  501,
  4154,
  4885,
  2588,
  2853,
  1732,
  2154,
  4605,
  4885,
  1208,
  135,
  4852,
  786,
  4145],
 [4729,
  4946,
  4951,
  1878,
  1366,
  2771,
  2129,
  1776,
  2588,
  2853,
  3572,
  649,
  4155,
  2956,
  1366,
  4920,
  3961,
  3572,
  3633,
  1918,
  1125,
  3352,
  4018,
  4133,
  371,
  885,
  1878,
  4790,
  4325,
  4276,
  3780,
  3724,
  753,
  2831,
  1619,
  4530,
  4020,
  4723,
  1856,
  2831,
  4890,
  827,
  2000,
  4995,
  3684,
  3873,
  421,
  491,
  3206],
 [1686,
  3908,
  1366,
  501,
  3748,
  3347,
  2062,
  615,
  4789,
  4281,
  2122,
  3480,
  3833,
  2112,
  2394,
  3873,
  1189,
  3492,
  191,
  4540,
  453,
  1686,
  3922,
  2204,
  4244,
  4530,
  868,
  580,
  3906,
  3936,
  496,
  4611,
  318,
  4097,
  3873,
  3828,
  2558,
  2771,
  4775,
  575,
  4162,
  2000,
  1570,
  580,
  4244,
  3873,
  4097,
  291

In [26]:
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[3925 1413 1447 ... 4852  786 4145]
 [4276 3780 3724 ...  421  491 3206]
 [3038 2197  210 ... 4862 2405 4729]
 ...
 [4215  123 1488 ... 4200 1933  371]
 [ 689 3449  447 ... 1045 3481 2576]
 [3575 4106 1488 ...  114  285 4106]]


##### Setting up and Training Model

In [27]:

from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dropout, Dense

embedding_vector_features = 40
# model1 = Sequential()
# model1.add(Embedding(vocab_size, embedding_vector_features))
# model1.add(Bidirectional(LSTM(50)))
# model1.add(Dropout(0.3))
# model1.add(Dense(1, activation='sigmoid'))
# model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# print(model1.summary())

from tensorflow.keras.regularizers import l2

model1 = Sequential()
model1.add(Embedding(vocab_size, embedding_vector_features))
model1.add(Bidirectional(LSTM(50, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01))))
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='sigmoid', kernel_regularizer=l2(0.01)))

model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model1.summary())

None


In [28]:
len(embedded_docs),y.shape

(4099, (4099,))

In [29]:
print(embedded_docs.shape)

(4099, 20)


In [30]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [31]:
import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

In [32]:
X_final.shape,y_final.shape
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)

##### Training the Model:

In [33]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

# Fit the model
history = model1.fit(X_train, y_train, validation_data=(X_test, y_test), 
                     epochs=10, batch_size=32, callbacks=[early_stopping, reduce_lr])

model1.save('News_Bias.h5')

Epoch 1/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.5579 - loss: 2.2918 - val_accuracy: 0.5831 - val_loss: 1.0249 - learning_rate: 0.0010
Epoch 2/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5783 - loss: 0.9047 - val_accuracy: 0.5831 - val_loss: 0.7214 - learning_rate: 0.0010
Epoch 3/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5721 - loss: 0.7012 - val_accuracy: 0.6061 - val_loss: 0.6832 - learning_rate: 0.0010
Epoch 4/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7876 - loss: 0.5474 - val_accuracy: 0.6090 - val_loss: 0.7464 - learning_rate: 0.0010
Epoch 5/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8671 - loss: 0.4235 - val_accuracy: 0.5817 - val_loss: 0.8880 - learning_rate: 0.0010
Epoch 6/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/



In [34]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Assuming X_test and y_test are already loaded and preprocessed
# Load the trained model
model = load_model('News_Bias.h5')

# Preprocess X_test (assuming X_test is in the same format as X_train)
maxlen = 20  # Assuming the maximum sentence length used during training
X_test_processed = pad_sequences(X_test, padding='pre', maxlen=maxlen)

# Predict probabilities for each class
y_pred_proba = model.predict(X_test_processed)

# Convert probabilities to classes (0 or 1)
y_pred = (y_pred_proba > 0.5).astype(int)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

# Print classification report
print(classification_report(y_test, y_pred))




[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
Accuracy: 60.61%
              precision    recall  f1-score   support

           0       0.52      0.64      0.57       564
           1       0.69      0.58      0.63       789

    accuracy                           0.61      1353
   macro avg       0.61      0.61      0.60      1353
weighted avg       0.62      0.61      0.61      1353



In [41]:
# Assuming you have already trained and saved the model
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load the saved model
model = load_model('News_Bias.h5')

# Function to preprocess custom input
def preprocess_input(text):
    vocab_size = 5000
    embedding_vector_features = 40
    sent_length = 20
    
    # Tokenize and preprocess text
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text)
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    onehot_repr = [one_hot(review, vocab_size)]
    embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)
    
    return np.array(embedded_docs)

# Example custom input
custom_input = "This news article presents a balanced view of the recent economic policies."

# Preprocess the input
processed_input = preprocess_input(custom_input)

# Make prediction
prediction = model.predict(processed_input)

# Interpret prediction
if prediction[0][0] >= 0.5:
    print("The news is biased.")
else:
    print("The news is not biased.")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step
The news is biased.


In [25]:
import numpy as np
from transformers import BertTokenizer, BertModel
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

# BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')


import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import re


# BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

# Custom dataset
# class NewsDataset(Dataset):
#     def __init__(self, texts, labels):
#         self.texts = texts
#         self.labels = labels
        
#     def __len__(self):
#         return len(self.texts)
    
#     def __getitem__(self, idx):
#         return {key: val[idx].clone().detach() for key, val in self.texts[idx].items()}, self.labels[idx]
from transformers import BatchEncoding

class NewsDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        if isinstance(text, BatchEncoding):
            # Convert BatchEncoding to dict and detach tensors
            return {key: val.clone().detach() for key, val in text.items()}, torch.tensor(label, dtype=torch.long)
        elif isinstance(text, dict):
            return {key: val.clone().detach() for key, val in text.items()}, torch.tensor(label, dtype=torch.long)
        else:
            raise TypeError(f"Expected BatchEncoding or dict for text, but got {type(text)}")

# # Debug print
# print(f"Total number of samples: {len(texts)}")
# print(f"Sample text: {texts[0]}")
# print(f"Sample label: {labels[0]}")




In [26]:
# Bidirectional LSTM model
class BiasDetector(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.2):
        super(BiasDetector, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, bidirectional=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out
    

def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for batch in train_loader:
        inputs, labels = batch
        inputs = {k: v.squeeze(1) for k, v in inputs.items()}
        labels = labels.long()  # Ensure labels are Long
        
        optimizer.zero_grad()
        
        with torch.no_grad():
            embeddings = bert_model(**inputs)[0]
        
        outputs = model(embeddings)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

    return total_loss / len(train_loader)

def evaluate(model, data_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            inputs, labels = batch
            inputs = {k: v.squeeze(1) for k, v in inputs.items()}
            labels = labels.long()  # Ensure labels are Long
            
            embeddings = bert_model(**inputs)[0]
            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
    accuracy = correct / total
    avg_loss = total_loss / len(data_loader)
    return avg_loss, accuracy

In [38]:
from sklearn.preprocessing import LabelEncoder
if __name__ == "__main__":
    
    #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    
    label_encoder = LabelEncoder()
    # Hyperparameters
    input_size = 768  # BERT embedding size
    hidden_size = 256
    num_layers = 2
    num_classes = 2  # Number of unique classes
    num_epochs = 10
    batch_size = 32
    learning_rate = 0.001

    # Prepare data
    texts = [tokenizer(text, padding='max_length', max_length=512, truncation=True, return_tensors="pt") for text in corpus]
    texts = [BatchEncoding(t) for t in texts]  # Ensure all items are BatchEncoding objects
    
    # Encode labels
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(df['label_bias'])
    labels = torch.tensor(encoded_labels)

    # # Debug print
    # print(f"Total number of samples: {len(texts)}")
    # print(f"Sample text: {texts[0]}")
    # print(f"Sample label: {labels[0]}")

    # Split data
    from sklearn.model_selection import train_test_split
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(texts, labels, test_size=0.3, random_state=42)
    val_texts, test_texts, val_labels, test_labels = train_test_split(temp_texts, temp_labels, test_size=0.5, random_state=42)

    # Create datasets and dataloaders
    train_dataset = NewsDataset(train_texts, train_labels)
    val_dataset = NewsDataset(val_texts, val_labels)
    test_dataset = NewsDataset(test_texts, test_labels)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    print('Dataset loaded')

    # Initialize model, optimizer, etc.
    model = BiasDetector(input_size, hidden_size, num_layers, num_classes)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    print('model initialized')
    
    # Training loop
    for epoch in range(num_epochs):
        print("training begins")
        train_loss = train_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_accuracy = Bidirectional.evaluate(model, val_loader, criterion)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
    
    # Final evaluation on test set
    test_loss, test_accuracy = evaluate(model, test_loader, criterion)
    print(f'Test Accuracy: {test_accuracy:.4f}')


    # Print classification report
    all_true_labels = []
    all_predicted_labels = []

    model.eval()
    with torch.no_grad():
        for batch in test_loader:
            inputs, labels = batch
            inputs = {k: v.squeeze(1) for k, v in inputs.items()}
            labels = labels.long()  # Ensure labels are Long

            embeddings = bert_model(**inputs)[0]
            outputs = model(embeddings)
            
            _, predicted = torch.max(outputs.data, 1)
            all_true_labels.extend(labels.cpu().numpy())
            all_predicted_labels.extend(predicted.cpu().numpy())

    target_names = ['class_0', 'class_1']  # Replace with your actual class names
    print(classification_report(all_true_labels, all_predicted_labels, target_names=target_names))

    # Save model
    torch.save(model.state_dict(), 'bias_detector.pth')



Epoch [1/10], Train Loss: 0.3587, Val Loss: 0.8545, Val Accuracy: 0.5787
Epoch [2/10], Train Loss: 0.3011, Val Loss: 1.0466, Val Accuracy: 0.5758
Epoch [3/10], Train Loss: 0.2345, Val Loss: 0.8045, Val Accuracy: 0.5906
Epoch [4/10], Train Loss: 0.2133, Val Loss: 0.7545, Val Accuracy: 0.6050
Epoch [5/10], Train Loss: 0.1946, Val Loss: 0.7542, Val Accuracy: 0.6047
Epoch [6/10], Train Loss: 0.1734, Val Loss: 0.6326, Val Accuracy: 0.6280
Epoch [7/10], Train Loss: 0.1604, Val Loss: 0.6255, Val Accuracy: 0.6273
Epoch [8/10], Train Loss: 0.1389, Val Loss: 0.5062, Val Accuracy: 0.6358
Epoch [9/10], Train Loss: 0.1365, Val Loss: 0.4980, Val Accuracy: 0.6524
Epoch [10/10], Train Loss: 0.1312, Val Loss: 0.4790, Val Accuracy: 0.6732
Test Accuracy: 0.6732
Classification Report:
              precision    recall  f1-score   support

     class_0       0.70      0.80      0.75       150
     class_1       0.65      0.55      0.60       120

    accuracy                           0.67       270
   mac

##### Predicting the bias

In [39]:
def predict_bias(text):
    # Tokenize input using BERT tokenizer
    inputs = tokenizer(text, padding='max_length', max_length=512, truncation=True, return_tensors="pt")
    
    # Get BERT embeddings
    with torch.no_grad():
        embeddings = bert_model(**inputs)[0]
    
    # Get prediction from BiasDetector model
    with torch.no_grad():
        outputs = model(embeddings)
        _, predicted = torch.max(outputs.data, 1)
    
    return predicted.item()

custom_input = "This news article claims that climate change is a hoax perpetrated by corporations to control the public. It presents no credible evidence and uses fear-mongering language."
prediction = predict_bias(custom_input)

if prediction == 0:
    print("The news is not biased.")
elif prediction == 1:
    print("The news is biased.")
else:
    print("Invalid prediction.")


the news is not biased. 
