In [45]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from string import punctuation
import operator
import statistics
stop_words = set(stopwords.words('spanish') + list(punctuation))
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

def getFile(fname):
    try:
        with open(fname, 'r', encoding='utf-8') as f:
            text = f.read()  
        return text
    except Exception as e:
        print(f"Error reading the file: {e}")
        return None

def tokenizador(texto):
    oraciones= ['. ','\n']
    tokens = []

    token_actual = ''
    for caracter in texto:
        if caracter in oraciones:
            if token_actual:
                tokens.append(token_actual)
                token_actual = ''
        else:
            token_actual += caracter

    if token_actual:
        tokens.append(token_actual)

    return tokens
        
def clean(textoTokenizado):
    
    #textoTokenizado = tokenizador(txt)
    newTokens = []
    for token in textoTokenizado:
        special_chars = "!@#$%^&*()-_=+[]{}|;:'\",.<>/?`~"
        
        # Replace special characters with ' '
        stripped = ''.join(char if char.isalnum() or char.isspace() or char in special_chars else ' ' for char in token)

        # Change any whitespace to one space
        stripped = ' '.join(stripped.split())

        # Remove start and end white spaces
        stripped = stripped.strip()
        
        newTokens.append(stripped)
    return newTokens

#unir conteo palabras,, freq palabras y tf oracion 

def countWordSentence(sentences):
    # Se crea un diccionario guardando el numero de palabras por oracion
    doc_info = []
    i = 0
    for sent in sentences:
        i += 1 
        words = sent.split()
        count = len(words)
        temp = {'sentence_id' : i, 'num_words' : count}
        doc_info.append(temp)
    return doc_info

def create_freq_dict(sents):
    i = 0
    freqDict_list = []
    for sent in sents:
        i += 1
        freq_dict = {}
        words = word_tokenize(sent)
        for word in words:
            word = word.lower()
            word = ps.stem(word)
            if word not in stop_words:
                if word in freq_dict:
                    freq_dict[word] += 1
                else:
                    freq_dict[word] = 1
                temp = {'sentence_id' : i, 'wordsfreq': freq_dict}
        freqDict_list.append(temp)
    return freqDict_list


def computeTF(doc_info, freqDict_list):
    TF_scores = []
    
    for tempDict in freqDict_list:
        id = tempDict['sentence_id']
        for k in tempDict['wordsfreq']:
            temp = {'sentence_id' : id,
                    'TF_score' : tempDict['wordsfreq'][k]/doc_info[id-1]['num_words'],
                     'word': k}
            TF_scores.append(temp)
    return TF_scores


import math 
def computeIDF(doc_info, freqDict_list):
    """
    idf = ln(total number of docs/number of docs with term in it)
    """
    
    IDF_scores = []
    counter = 0
    for dict in freqDict_list:
        counter += 1
        for k in dict['wordsfreq'].keys():
            count = sum([k in tempDict['wordsfreq'] for tempDict in freqDict_list])
            temp = {'sentence_id' : counter, 'IDF_score' : math.log(len(doc_info)/count), 'word' : k}
    
            IDF_scores.append(temp)
                
    return IDF_scores

def computeTFIDF(TF_scores, IDF_scores):
    """
    TFIDF is computed by multiplying the coressponding
    TF and IDF values of each term. 
    """
    TFIDF_scores = []
    for j in IDF_scores:
        for i in TF_scores:
            if j['word'] == i['word'] and j['sentence_id'] == i['sentence_id']:
                temp = {'sentence_id' : i['sentence_id'],
                        'TFIDF_score' : j['IDF_score']*i['TF_score'],
                       'word' : i['word']}
        TFIDF_scores.append(temp)
    return TFIDF_scores

#se une hasta aqui

def global_frequency(txtClean):
    """
    This function returns a dictionary with the frequency 
    count of every word in the text
    """
    freq_table = {}
    text = ' '.join(txtClean) #join the cleaned sentences to get the text 
    words = word_tokenize(text)
    for word in words:
        word = word.lower()
        word = ps.stem(word)
        if word not in stop_words:
            if word in freq_table:
                freq_table[word] += 1
            else:
                freq_table[word] = 1
                
    return freq_table


def get_keywords(txtClean, n):

    freq_table = global_frequency(txtClean)
    freq_table_sorted = sorted(freq_table.items(), key = operator.itemgetter(1), reverse = True) 
    keywords = []
    for i in range(0, n):  #taking first 5 most frequent words
        keywords.append(freq_table_sorted[i][0])
    return keywords

def weigh_keywords(txtClean, TFIDF_scores, n):

    keywords = get_keywords(txtClean, n)
    for temp_dict in TFIDF_scores:
        if temp_dict['word'] in keywords:
            temp_dict['TFIDF_score'] *= 2
    return TFIDF_scores

def get_sent_score(TFIDF_scores, text_sents, doc_info):
    
    sentence_info = []
    for doc in doc_info:

        sent_score = 0
        for i in range(0, len(TFIDF_scores)):
            temp_dict = TFIDF_scores[i]
            if doc['sentence_id'] == temp_dict['sentence_id']:
                sent_score += temp_dict['TFIDF_score']
        temp = {'sentence_id' : doc['sentence_id'], 'sent_score' : sent_score,
                'sentence' : text_sents[doc['sentence_id']-1]}
        sentence_info.append(temp)

    return sentence_info

def get_summary(sentence_info):
    sum = 0
    summary = []
    array = []
    for temp_dict in sentence_info:
        """
        This loop gets the sum of scores
        of all the sentences.
        """
        sum += temp_dict['sent_score']
    avg = sum/len(sentence_info) #computing the average tf-idf score
    for temp_dict in sentence_info:
        """
        This loop gets the sentence scores 
        and stores them in an array.
        """
        array.append(temp_dict['sent_score'])
    stdev = statistics.stdev(array) #computing standard deviation on the array   
    for sent in sentence_info:
        """
        This loop is for getting the sumamry by 
        extracting sentences by an if clause
        """
        if(sent['sent_score']) >= avg: # + 1.5*stdev:
            summary.append(sent['sentence'])
    summary = '\n'.join(summary)
    return summary

In [46]:
def resumirF(texto):
    texto = sent_tokenize(texto)
    txtClean = clean(texto)
    txtDic = countWordSentence(txtClean)
    freqDict = create_freq_dict(txtClean)
    TF = computeTF(txtDic,freqDict)
    IDF = computeIDF(txtDic,freqDict)
    TFIDF_scores = computeTFIDF(TF, IDF)

    TFIDF_scores = weigh_keywords(txtClean, TFIDF_scores, 5)
        
    sentence_info = get_sent_score(TFIDF_scores, txtClean, txtDic)
    summary = get_summary(sentence_info)
    print(summary)
    texto = summary
    return texto

In [48]:
def resumir(file):
    texto = getFile(file)
    texto = sent_tokenize(texto)

    txtClean = clean(texto)
    txtDic = countWordSentence(txtClean)
    freqDict = create_freq_dict(txtClean)
    TF = computeTF(txtDic,freqDict)
    IDF = computeIDF(txtDic,freqDict)
    TFIDF_scores = computeTFIDF(TF, IDF)

    TFIDF_scores = weigh_keywords(txtClean, TFIDF_scores, 5)
        
    sentence_info = get_sent_score(TFIDF_scores, texto, txtDic)

    summary = get_summary(sentence_info)
    #print(summary)
    texto = summary
    return texto

resumenG = []
resumen1 = resumir('prueba2_ia.txt')
resumenG.append(resumen1) 
resumen2 = resumir('texto.txt')
resumenG.append(resumen2)
resumen_total = '\n'.join(resumenG)
print(resumen_total)
resumenF = resumirF(resumen_total)
#print(resumenF)

Inteligencia artificial general (AGI)
Esta categoría -Artificial General Intelligence- se alcanza cuando una máquina adquiere capacidades cognitivas a nivel humano.
Es decir, cuando puede realizar cualquier tarea intelectual que realiza una persona.
El Congreso de EE.UU., por su parte, convocó este martes al CEO de OpenAI, Sam Altman, a responder preguntas sobre ChatGPT.
Carlos Ignacio Gutiérrez, investigador de políticas públicas en el Future of Life Institute, explicó a BBC Mundo que uno de los grandes desafíos que presenta la IA es que "no existe un cuerpo colegiado de expertos que deciden cómo regularlo, como ocurre, por ejemplo, con el Panel Intergubernamental sobre Cambio Climático (IPCC)".
"¿Deberíamos desarrollar mentes no humanas que eventualmente podrían superarnos en número, ser más inteligentes, hacernos obsoletos y reemplazarnos?
", cuestionaron.
El futuro de la IA: hacia inteligencias artificiales realmente inteligentes
Ramón López de Mántaras
Instituto de Investigación e