### Import modules

In [1]:
from scrapper.art_hash import get_hash
from scrapper.extract_artcls_data import current_artcls, artcl_data, artcl_content
from scrapper.location_detector import detect_location, validate_location, get_location_id
from scrapper.relevance_score_claculator import calculate_score as score
from database_manager.database_connector import dbconnection as db
from database_manager import querys

### Save url containing articles

In [2]:
# Base url
news_url= "https://www.elnuevodia.com/noticias/seguridad"

### Construct data dictionary and insert data into database

In [3]:
# Get the current articles in endi.com 
# under the Seguridad category
articles = current_artcls(news_url)

# Iterate over current articles
for article in articles:
    # Re-establish database connection
    db.reconnect()
    # Create communication cursor object
    cursor = db.cursor(buffered=True)

    # Get article hash
    art_hash = get_hash(article=article)
    # Fetch stored articles hashes
    cursor.execute(querys.FETCH_HASHES)
    # Select rows where article hash exists
    cursor.execute(querys.CHECK_HASH, [art_hash])
    # Count number of rows hash appears
    check = cursor.rowcount

    # Get article data and store to database only if
    # article hash do not exists; check variable must
    # be 0
    if check == 0:
        # Create data dictionary with extracted article initial
        # data and hash 
        # URL, Headline, Author, and Hash 
        query_dict = artcl_data(article=article)
        
        # Update data dictionary to store content data
        # Subheadline, Datetime, and Content
        query_dict.update(artcl_content(article_data=query_dict))
        
        # Insert article data into database table Articles 
        cursor.execute(querys.INSERT_ARTICLE, query_dict)
        # Commit the insert
        db.commit()

        # Get ID of inserted article
        art_id = cursor.lastrowid

        # Create list with article detected location 
        locs_found = detect_location(article_data=query_dict)
        # Create list with detected locations validated
        loc_validated = validate_location(location=locs_found)
        
        # Insert into ArticleLocationRelationship only if
        # valudad location contains values 
        if loc_validated:
            # Iterate over validated locations
            for loc in loc_validated:
                # Get the location id
                loc_id = get_location_id(validated_location=loc)
                
                # Insert ArticleID and LocationID into ArticlesLocationRelation
                query_dict = {"articleid": art_id, "locationid": loc_id}
                cursor.execute(querys.INSERT_ARTICLE_LOCATION, query_dict)
                db.commit()

                # Calculate relevance score for the given location
                sc = score(location_id=loc_id)
                # Update the location relevance score
                query_dict = {"locationid":loc_id , "score": sc}
                cursor.execute(querys.UPDATE_RELEVACE_SCORE, query_dict)
                db.commit()
    cursor.close()
    db.close()

## Print Articles rows

In [6]:
db.reconnect()
query = """SELECT * FROM Articles"""
cursor = db.cursor(buffered=True)
cursor.execute(query)
for row in cursor:
    print(row)
db.close()

(1, 'https://www.elnuevodia.com/noticias/seguridad/notas/policia-verifica-videos-y-entrevista-testigos-en-relacion-al-asesinato-de-un-adolescente-de-15-anos-en-aguadilla/', 'Policía verifica vídeos y entrevista testigos en relación al asesinato de un adolescente de 15 años en Aguadilla', 'Por Alex Figueroa Cancel', 'Los investigadores sospechan que hubo más de un atacante', datetime.datetime(2024, 7, 23, 11, 51), 'b15fbb2241cd5026433ac9450f31f15657ef962bea5d9a858774038795b8c224')
(2, 'https://www.elnuevodia.com/noticias/seguridad/notas/hallan-el-cuerpo-de-una-persona-en-estado-descomposicion-en-toa-baja/', 'Hallan el cuerpo de una persona en estado descomposición en Toa Baja', 'Por Alex Figueroa Cancel', 'Fue encontrado por un ciudadano debajo del llamado “Puente de las Banderas” en Levittown', datetime.datetime(2024, 7, 23, 9, 14), 'a36e87d490393f673fbc652fe4c6175187d32bbc468d7ef2bc3c66e7a9de8c77')
(3, 'https://www.elnuevodia.com/noticias/seguridad/notas/desmantelan-organizacion-crimi

## Print Articles-Location Relation rows

In [5]:
db.reconnect()
query = """SELECT * FROM ArticlesLocationRelation"""
cursor = db.cursor(buffered=True)
cursor.execute(query)
for row in cursor:
    print(row)
db.close()

(1, 3)
(11, 3)
(23, 3)
(22, 6)
(21, 7)
(10, 11)
(16, 11)
(17, 11)
(28, 11)
(29, 11)
(12, 13)
(19, 13)
(24, 13)
(31, 29)
(3, 32)
(5, 36)
(13, 36)
(25, 36)
(12, 40)
(19, 40)
(24, 40)
(13, 44)
(25, 44)
(22, 50)
(7, 65)
(8, 65)
(9, 65)
(10, 65)
(13, 65)
(14, 65)
(15, 65)
(18, 65)
(25, 65)
(26, 65)
(27, 65)
(30, 69)
(2, 70)


## Print Location rows

In [4]:
db.reconnect()
query = """SELECT * FROM Location"""
cursor = db.cursor(buffered=True)
cursor.execute(query)
for row in cursor:
    print(row)
db.close()

(1, 'adjuntas', 0.0)
(2, 'aguada', 0.0)
(3, 'aguadilla', 0.13043478260869565)
(4, 'aguas_buenas', 0.0)
(5, 'aibonito', 0.0)
(6, 'anasco', 0.045454545454545456)
(7, 'arecibo', 0.047619047619047616)
(8, 'arroyo', 0.0)
(9, 'barceloneta', 0.0)
(10, 'barranquitas', 0.0)
(11, 'bayamon', 0.1724137931034483)
(12, 'cabo_rojo', 0.0)
(13, 'caguas', 0.125)
(14, 'camuy', 0.0)
(15, 'canovanas', 0.0)
(16, 'carolina', 0.0)
(17, 'catano', 0.0)
(18, 'cayey', 0.0)
(19, 'ceiba', 0.0)
(20, 'ciales', 0.0)
(21, 'cidra', 0.0)
(22, 'coamo', 0.0)
(23, 'comerio', 0.0)
(24, 'corozal', 0.0)
(25, 'culebra', 0.0)
(26, 'dorado', 0.0)
(27, 'fajardo', 0.0)
(28, 'florida', 0.0)
(29, 'guanica', 0.03225806451612903)
(30, 'guayama', 0.0)
(31, 'guayanilla', 0.0)
(32, 'guaynabo', 0.3333333333333333)
(33, 'gurabo', 0.0)
(34, 'hatillo', 0.0)
(35, 'hormigueros', 0.0)
(36, 'humacao', 0.12)
(37, 'isabela', 0.0)
(38, 'jayuya', 0.0)
(39, 'juana_diaz', 0.0)
(40, 'juncos', 0.125)
(41, 'lajas', 0.0)
(42, 'lares', 0.0)
(43, 'las_marias

# Testing