In [50]:
# Importar librerías
import pandas as pd
import numpy as np
import tensorflow as tf
from gensim.models import Word2Vec
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense



# Cargar datos
data = pd.read_csv('/content/fake_or_real_news.csv')

# Análisis exploratorio de variables
print(data.head())
print(data['label'].value_counts())

# Preprocesamiento de datos de texto
from gensim.utils import simple_preprocess

data['titulo_procesado'] = data['title'].apply(lambda x: simple_preprocess(x))
data['texto_procesado'] = data['text'].apply(lambda x: simple_preprocess(x))

# Entrenamiento de modelo Word2Vec
model_w2v = Word2Vec(sentences=data['titulo_procesado'] + data['texto_procesado'],
                     vector_size=100, window=5, min_count=1, workers=4)

# Vectorización de texto
def vectorizar_texto(titulo, texto, model):
    vectors_titulo = []
    vectors_texto = []

    for word in titulo:
        if word in model.wv:
            vectors_titulo.append(model.wv[word])

    for word in texto:
        if word in model.wv:
            vectors_texto.append(model.wv[word])

    if vectors_titulo and vectors_texto:
        vector_titulo = np.mean(vectors_titulo, axis=0)
        vector_texto = np.mean(vectors_texto, axis=0)
        return np.concatenate([vector_titulo, vector_texto])
    else:
        return np.zeros(model.vector_size * 2)

data['vector_texto'] = data.apply(lambda row: vectorizar_texto(row['titulo_procesado'], row['texto_procesado'], model_w2v), axis=1)

# Creación de datos para la red neuronal
X = np.vstack(data['vector_texto'].to_numpy())
y = (data['label'] == 'FAKE').astype(int)

# Definición del modelo de red neuronal
model = Sequential()
model.add(Dense(128, input_dim=model_w2v.vector_size * 2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Función dummy para test_function
@tf.autograph.experimental.do_not_convert
def dummy_test_function(self, step_inputs, step_state):
    pass

# Entrenamiento del modelo
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

# Evaluación del modelo
loss, accuracy = model.evaluate(X, y)
print(f'Loss: {loss}, Accuracy: {accuracy}')




   Unnamed: 0                                              title  \
0        8476                       You Can Smell Hillary’s Fear   
1       10294  Watch The Exact Moment Paul Ryan Committed Pol...   
2        3608        Kerry to go to Paris in gesture of sympathy   
3       10142  Bernie supporters on Twitter erupt in anger ag...   
4         875   The Battle of New York: Why This Primary Matters   

                                                text label  
0  Daniel Greenfield, a Shillman Journalism Fello...  FAKE  
1  Google Pinterest Digg Linkedin Reddit Stumbleu...  FAKE  
2  U.S. Secretary of State John F. Kerry said Mon...  REAL  
3  — Kaydee King (@KaydeeKing) November 9, 2016 T...  FAKE  
4  It's primary day in New York and front-runners...  REAL  
REAL    3171
FAKE    3164
Name: label, dtype: int64
Epoch 1/10

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.19009771943092346, Accuracy: 0.9305446147918701


In [51]:
# Importar librerías adicionales

import requests
from bs4 import BeautifulSoup

# Función para obtener texto de una URL
def obtener_texto_desde_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Verificar si la solicitud fue exitosa
        soup = BeautifulSoup(response.text, 'html.parser')
        # Ajusta la selección del contenido según la estructura de la página web
        contenido = soup.get_text()
        return contenido
    except Exception as e:
        print(f'Error al obtener contenido de la URL: {url}')
        print(e)
        return None

# URL de las dos noticias nuevas
url_noticia1 = 'https://www.breitbart.com/politics/2016/09/10/exposed-fbi-director-james-comeys-clinton-foundation-connection/'
url_noticia2 = 'https://english.elpais.com/sports/2023-07-24/saudi-arabian-soccer-team-al-hilal-makes-record-332-million-bid-for-france-striker-kylian-mbappe.html'

# Obtener texto de las noticias desde las URLs
texto_noticia1 = obtener_texto_desde_url(url_noticia1)
texto_noticia2 = obtener_texto_desde_url(url_noticia2)

# Verificar si se obtuvo un texto válido antes de procesar
if texto_noticia1 is not None and texto_noticia2 is not None:
    # Preprocesamiento de datos de texto
    titulo_noticia1_procesado = simple_preprocess("Título de la Noticia 1")
    texto_noticia1_procesado = simple_preprocess(texto_noticia1)

    titulo_noticia2_procesado = simple_preprocess("Título de la Noticia 2")
    texto_noticia2_procesado = simple_preprocess(texto_noticia2)

    # Vectorización de texto
    vector_noticia1 = vectorizar_texto(titulo_noticia1_procesado, texto_noticia1_procesado, model_w2v)
    vector_noticia2 = vectorizar_texto(titulo_noticia2_procesado, texto_noticia2_procesado, model_w2v)

    # Crear datos para la predicción
    X_prediccion = np.vstack([vector_noticia1, vector_noticia2])

    # Realizar la predicción
    predicciones = model.predict(X_prediccion)

    # Imprimir resultados
    for i, prediccion in enumerate(predicciones):
        etiqueta_predicha = 'FAKE' if prediccion < 0.5 else 'REAL'
        print(f'Noticia {i + 1}: Etiqueta Predicha: {etiqueta_predicha} (Confianza: {prediccion[0]:.4f})')
else:
    print('No se pudo obtener texto válido de al menos una de las noticias.')




Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Noticia 1: Etiqueta Predicha: REAL (Confianza: 0.8394)
Noticia 2: Etiqueta Predicha: REAL (Confianza: 0.9185)
