### Import modules

In [1]:
from scrapper.art_hash import get_hash
from scrapper.extract_artcls_data import current_artcls, artcl_data, artcl_content
from scrapper.location_detector import detect_location, validate_location, get_location_id
from scrapper.relevance_score_claculator import calculate_score as score
from database_manager.database_connector import dbconnection as db
from database_manager import querys

### Save url containing articles

In [2]:
# Base url
news_url= "https://www.elnuevodia.com/noticias/seguridad"

### Construct data dictionary and insert data into database

In [5]:
db.reconnect()
cursor = db.cursor(buffered=True)

# Fetch stored articles hashes
cursor.execute(querys.FETCH_HASHES)

# Create list of hashes
hashes = []
for row in cursor:
    hashes.append(row[0])

# Get current articles
articles = current_artcls(news_url)

# Iterate over current articles
for article in articles:
    # Get article has
    art_hash = get_hash(article=article)
    # Get article data and store to database only of hash is not in database already
    if art_hash not in hashes:

        data_dict = artcl_data(article=article)
        
        data_dict.update(artcl_content(article_data=data_dict))
        
        # Execute insert and commit to database
        cursor.execute(querys.INSERT_ARTICLE, data_dict)
        db.commit()

        # Get ID of inserted article
        art_id = cursor.lastrowid

        locs_found = detect_location(article_data=data_dict)

        loc_validated = validate_location(location=locs_found)
        
        if loc_validated:
            for loc in loc_validated:
                
                loc_id = get_location_id(validated_location=loc)
                scre = score(location_id=loc_id)
                print(f"{loc} ID: {loc_id} --- Score {scre}")
                # To do: Insert (ArticleID, LocationID) into ArticleLocationRelationship table
                # using the article ID fetched from database after inserted, and loc_id in this
                # for loop
                data_dict = {"articleid": art_id, "locationid": loc_id}
                cursor.execute(querys.INSERT_ARTICLE_LOCATION, data_dict)
                db.commit()

## Calculate relevance score

In [6]:
db.reconnect()
cursor = db.cursor(buffered=True)
cursor.execute("SELECT * FROM Location")
for row in cursor:
    scre = score(location_id=row[0])
    print(f"{row[1]} ID: {row[0]} --- Score {scre}")

adjuntas ID: 1 --- Score 0.0
aguada ID: 2 --- Score 0.0
aguadilla ID: 3 --- Score 0.02631578947368421
aguas_buenas ID: 4 --- Score 0.0
aibonito ID: 5 --- Score 0.02631578947368421
anasco ID: 6 --- Score 0.0
arecibo ID: 7 --- Score 0.0
arroyo ID: 8 --- Score 0.0
barceloneta ID: 9 --- Score 0.0
barranquitas ID: 10 --- Score 0.0
bayamon ID: 11 --- Score 0.21052631578947367
cabo_rojo ID: 12 --- Score 0.0
caguas ID: 13 --- Score 0.15789473684210525
camuy ID: 14 --- Score 0.02631578947368421
canovanas ID: 15 --- Score 0.0
carolina ID: 16 --- Score 0.0
catano ID: 17 --- Score 0.0
cayey ID: 18 --- Score 0.02631578947368421
ceiba ID: 19 --- Score 0.0
ciales ID: 20 --- Score 0.0
cidra ID: 21 --- Score 0.02631578947368421
coamo ID: 22 --- Score 0.0
comerio ID: 23 --- Score 0.0
corozal ID: 24 --- Score 0.0
culebra ID: 25 --- Score 0.0
dorado ID: 26 --- Score 0.02631578947368421
fajardo ID: 27 --- Score 0.02631578947368421
florida ID: 28 --- Score 0.0
guanica ID: 29 --- Score 0.05263157894736842
gu

### Print Articles table rows

In [None]:
query = """SELECT * FROM Articles"""
cursor.execute(query)
for row in cursor:
    print(row)

## Print Location rows

In [None]:
db.reconnect()
query = """SELECT * FROM Location"""
cursor = db.cursor(buffered=True)
cursor.execute(query)
for row in cursor:
    print(row)
db.close()

## Print bridging table rows

In [None]:
query = """SELECT * FROM ArticlesLocationRelation"""
cursor = db.cursor(buffered=True)
cursor.execute(query)
for row in cursor:
    print(row)
db.close()

## Close database connection

In [None]:
db.close()

# Testing

In [None]:
from database_manager.database_connector import dbconnection as db
from database_manager import querys

db.reconnect()
cursor = db.cursor(buffered=True)
data = {"locationid": 65}
cursor.execute(querys.COUNT, data)
# cursor.execute("SELECT COUNT(*) FROM ArticlesLocationRelation WHERE LocationID = 65")
# cursor.execute("SELECT LocationID FROM ArticlesLocationRelation")
count = cursor.fetchone()[0]
# print(count)

data = {"table": "Articles", "id_field": "ArticleID"}
cursor.execute(querys.ARTICLE_MAXID)
print(cursor.fetchone()[0])
# arts_count = 
# for row in cursor:
#     print(row[0])

db.close()