In [4]:
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core import patch
import json
import getCredibilityScore as cr
import sentimentAnalysis as sa
import getBiasScore as bs
import spacyMatcher as sm
import validators
import sys
import logging
import traceback

segment = xray_recorder.begin_segment('lambda_handler')

logger = logging.getLogger()
logger.setLevel(logging.INFO)

@xray_recorder.capture('lambda_handler')
def lambda_handler(event, context):

    ## event must be a dict with a url key, and context can be nothing:
    ## lambda_handler({"url":"http://bbc.co.uk"}, "")

    subsegment = xray_recorder.begin_subsegment('lambda_function: check URL')
    logger.info(f'LambdaFunction: Checking we have a URL...')
    try:
        url = event['url']
    except KeyError:
        logger.info(f'LambdaFunction: URL is not present.')
        return {"error" : "No URL provided"}

    xray_recorder.end_subsegment()

    #### Adding "https://" to the URL if not present
    logger.info(f'LambdaFunction: Checking URL has protocol...')
    subsegment = xray_recorder.begin_subsegment('lambda_function: check URL protocol')
    if (not url.startswith('https://') and not url.startswith('http://')):
        url = 'https://' + url
    xray_recorder.end_subsegment()

    logger.info(f'LambdaFunction: Validating URL...')
    subsegment = xray_recorder.begin_subsegment('lambda_function: validate URL')
    if (not validators.url(url)):
        logger.info(f'LambdaFunction: URL is not valid.')
        return {"error" : "The url was bad"}
    xray_recorder.end_subsegment()

    #### Define the object skeleton
    object = {
        "url" : url,
        "results" : []
    }

    logger.info(f'LambdaFunction: Trying to get credibility score...')
    credibilityresult = {}
    try:
        credibilityresult = cr.getCredibilityScore(url)
        object['results'].append(credibilityresult)
    except Exception as e:
        logger.info(f'LambdaFunction: Could not get Credibility Score.')
        logger.info(e)
        credibilityresult = {'type': 'credibility', 'outcome': {"error" : "The credibility score was not available."}}
        object['results'].append(credibilityresult)

    logger.info(f'LambdaFunction: Trying to get sentimentAnalysis score...')
    sentanalysisresult = {}
    try:
        sentanalysisresult = sa.sentimentAnalysis(url)
        if sentanalysisresult['text'] == '-1':
            logger.info(f'LambdaFunction: sentimentAnalysis returned -1, dumping:')
            logger.info(sentanalysisresult)
            object['article'] = {'error': "The article summary could not be generated"}
            object['results'].append({'type': 'polarity',     "outcome": {"error" : "The polarity score could not be calculated."}})
            object['results'].append({'type': 'objectivity', "outcome": {"error" : "The objectivity score could not be calculated."}})
        else:
            object['article'] = {'header': sentanalysisresult['header'], 
                                    'summary': sentanalysisresult['summary'],
                                    'keywords': sentanalysisresult['keywords']}
            object['results'].append({'type': 'polarity',     'outcome': {"score": sentanalysisresult['polarity']}})
            object['results'].append({'type': 'objectivity', 'outcome': {"score": abs(1 - sentanalysisresult['subjectivity'])}})
    except Exception as e:
        logger.info(f'LambdaFunction: Could not get sentimentAnalysis Score.')
        exception_type, exception_value, exception_traceback = sys.exc_info()
        traceback_string = traceback.format_exception(exception_type, exception_value, exception_traceback)
        err_msg = json.dumps({
            "errorType": exception_type.__name__,
            "errorMessage": str(exception_value),
            "stackTrace": traceback_string
        })
        logger.error(err_msg)

        sentanalysisresult = {'text': -1}
        object['article'] = {'error': "The article summary could not be generated"}
        object['results'].append({'type': 'polarity',     "outcome": {"error" : "The polarity score could not be calculated."}})
        object['results'].append({'type': 'objectivity', "outcome": {"error" : "The objectivity score could not be calculated."}})

    logger.info(f'LambdaFunction: Trying to get bias score...')
    if sentanalysisresult['text'] != '-1':
        if 'error' in credibilityresult['outcome']:
            cred_input = -1
        else:
            cred_input = credibilityresult['outcome']['score']
        try:
            biasscoreresult = bs.getBiasScore(cred_input, sentanalysisresult['polarity'], sentanalysisresult['subjectivity'])
            object['results'].append(biasscoreresult)
        except Exception as e:
            logger.info(f'LambdaFunction: Could not get Bias Score.')
            logger.info(e)
            object['results'].append({'type': 'bias', 'outcome': {"error" : "The bias score was not available."}})
    else:
        object['results'].append({'type': 'bias', 'outcome': {"error" : "The bias score was not available."}})

        
        
    if sentanalysisresult['text'] != '-1':
        try:
            text = ('Today - News from BBC: Bad news for Donald Trump in USA, 90% of the population complain '
                    'and a thousand people shout in English on the streets - this happened at 11:00. '
                    'Also, the Rocky Mountains seems to be more and more in risk ... Spanish nationals are applying for a permit. '
                    'World War II truly changed the world. The Bible is the most read book in the world. '
                    'The third released collection is worth £10,000,000. Not easy to walk 20 kilometers in that steep hilly place...')

            ##list_objs = sm.spacyMatcher(text, '') ###sentanalysisresult['text'], '') ## ALL Tags
            list_objs = sm.spacyMatcher(sentanalysisresult['text'], '') ## ALL Tags            
            object['article']['topics'] = list_objs            
        except Exception as e:
            logger.info(f'LambdaFunction: Could not get Topics.')
            logger.info(e)
            object['article']['topics'] = {"error" : "No topics available."}
        
    #### Intended object to return:
    # {
    #   'url':'http://bbc.co.uk',
    #   'article' : {
    #     'header' : 'An Article Title',
    #     'summary' : 'The Article Summary',
    #     'keywords' : ['Boris Johnson', 'Brexit']
    #   },
    #   'results' : [
    #     { 'type' : 'credibility' ...... },
    #     { 'type' : 'polarity' ..... },
    #     { 'type' : 'objectivity' .....},
    #     { 'type' : 'biasscore' .....}
    #   ]
    # }

    return object

xray_recorder.end_segment()

In [7]:
###def test_BiasScore_noPolSubj():

result_dict = lambda_handler({"url":'http://sentimentalists-tests.s3-website.eu-west-2.amazonaws.com/today.html'}, "")
#result_dict = lambda_handler({"url":"http://bbc.co.uk"}, "")

#result_dict = lambda_handler({"url":'https://www.theguardian.com/world/2020/oct/22/england-test-and-trace-reaching-fewer-covid-contacts-than-ever'}, "")

#result_score = {'type': 'bias', 'outcome': {"error" : "The bias score was not available."}}
#assert result_dict['results'][3] == result_score


sentimentAnalysis: Trying getText()
getText: Initialising Article...
getText: Downloading Article...
getText: Parsing Article...
sentimentAnalysis: Dumping return:
{'text': 'Horrible Day!Today is a horrible day. Today is a horrible day. Today is a horrible day.', 'header': 'Test', 'summary': 'Horrible Day!\nToday is a horrible day.\nToday is a horrible day.\nToday is a horrible day.', 'keywords': ['daytoday', 'today', 'test', 'horrible', 'day']}


In [8]:
result_dict

{'url': 'http://sentimentalists-tests.s3-website.eu-west-2.amazonaws.com/today.html',
 'results': [{'type': 'credibility',
   'outcome': {'error': 'The credibility score was not available.'}},
  {'type': 'polarity', 'outcome': {'score': -1.0}},
  {'type': 'objectivity', 'outcome': {'score': 0.0}},
  {'type': 'bias', 'outcome': {'score': 100.0}}],
 'article': {'header': 'Test',
  'summary': 'Horrible Day!\nToday is a horrible day.\nToday is a horrible day.\nToday is a horrible day.',
  'keywords': ['daytoday', 'today', 'test', 'horrible', 'day'],
  'topics': [{'type': 'DATE', 'topic': 'Today'}]}}

In [48]:
result_dict

{'url': 'http://sentimentalists-tests.s3-website.eu-west-2.amazonaws.com/today.html',
 'results': [{'type': 'credibility',
   'outcome': {'error': 'The credibility score was not available.'}},
  {'type': 'polarity', 'outcome': {'score': -1.0}},
  {'type': 'objectivity', 'outcome': {'score': 0.0}},
  {'type': 'bias', 'outcome': {'score': 100.0}}],
 'article': {'header': 'Test',
  'summary': 'Horrible Day!\nToday is a horrible day.\nToday is a horrible day.\nToday is a horrible day.',
  'keywords': ['today', 'horrible', 'daytoday', 'day', 'test'],
  'topics': [{'type': 'DATE', 'topic': 'Today'}]}}

In [16]:
def getBiasScore(credibility, polarity, subjectivity):
            
    IncredibilityPercent = (100-credibility)
    PolarityPercent = (polarity)*100
    SubjectivityPercent = (subjectivity)*100
    
    if (credibility >= 0):
        BiasIndicator = ((IncredibilityPercent + abs(PolarityPercent) + SubjectivityPercent)/3)
    else:
        BiasIndicator = ((abs(PolarityPercent)) + SubjectivityPercent)/2

    response = {'type':'bias','outcome':{'score':BiasIndicator}}
    return response


In [22]:
def sentimentAnalysis(url):
    import getText as getTxt
    from textblob import TextBlob

    dict_return = getTxt.getText(url)
    testimonial = ''

    if dict_return['text'] != '-1':
        ### analysing the text 
        testimonial = TextBlob(dict_return['text'])
        ## use the output as "testimonial.sentiment" or "testimonial.sentences"
        dict_return['polarity'] = testimonial.sentiment.polarity
        dict_return['subjectivity'] = testimonial.sentiment.subjectivity

    return(dict_return)

In [23]:
#url = 'https://www.theguardian.com/world/2020/oct/22/england-test-and-trace-reaching-fewer-covid-contacts-than-ever'
#url = "http://sentimentalists-tests.s3-website.eu-west-2.amazonaws.com/today.html"
#url = 'https://www.bbc.co.uk/news/uk-54234084'

url = 'D:/AATechReturners/Pre-Journey/HTML/index2.html'

sentanalysisresult = sentimentAnalysis(url)
print(sentanalysisresult['polarity']) ######## -1.0
print(sentanalysisresult['subjectivity'])#####  1.0

getBiasScore(95, sentanalysisresult['polarity'], sentanalysisresult['subjectivity'])


getText: Initialising Article...
getText: Downloading Article...
getText: Parsing Article...
getText: Exception: Article `download()` failed with No connection adapters were found for 'D:/AATechReturners/Pre-Journey/HTML/index2.html' on URL D:/AATechReturners/Pre-Journey/HTML/index2.html


KeyError: 'polarity'

In [1]:
### lambda

import json
import getCredibilityScore as cr
import sentimentAnalysis as sa
import validators

def lambda_handler(event, context):
## event must be a dict with a url key, and context can be nothing:
## lambda_handler({"url":"http://bbc.co.uk"}, "")
    
    try:
        url = event['url']
    except KeyError:
        return {
            'statusCode': 400,
            'body': json.dumps("no url")
        }

    #### Adding "https://" to the URL if not present
    if (not url.startswith('https://') and not url.startswith('http://')):
        url = 'https://' + url

    if (not validators.url(url)):
        return json.dumps({"error" : "The url was bad"})

    #### Define the object skeleton
    object = {
        "url" : url,
        "results" : []
    }

    try:
        credibilityresult = cr.getCredibilityScore(url)
        object['results'].append(credibilityresult)
    except Exception as e:
        object['results'].append({'type': 'credibility', 'outcome': {'score': -1}})
        
    try:
        sentanalysisresult = sa.sentimentAnalysis(url)
        object['article'] = {'header': sentanalysisresult['header'], 
                             'summary': sentanalysisresult['summary'],
                             'keywords': sentanalysisresult['keywords']}
        object['results'].append({'type': 'polarity',     'outcome': sentanalysisresult['polarity']})
        object['results'].append({'type': 'subjectivity', 'outcome': sentanalysisresult['subjectivity']})
        object['results'].append({'type': 'objectivity', 'outcome': abs(1 - sentanalysisresult['subjectivity'])})
    except Exception as e:
        object['article'] = {'error': 'The article could not be retrieved.'}
        object['results'].append({'type': 'polarity',     'error': 'no data available'})
        object['results'].append({'type': 'subjectivity', 'error': 'no data available'})

    #### Intended object to return:
    # {
    #   'url':'http://bbc.co.uk',
    #   'article' : {
    #     'header' : 'An Article Title',
    #     'summary' : 'The Article Summary',
    #     'keywords' : ['Boris Johnson', 'Brexit']
    #   },
    #   'results' : [
    #     { 'type' : 'credibility' ...... },
    #     { 'type' : 'polarity' ..... },
    #     { 'type' : 'subjectivity' .....},
    #     { 'type' : 'biasscore' .....}
    #   ]
    # }

    jsonresponse = json.dumps(object)
#    return jsonresponse

    return object

In [19]:
#url = 'http://sentimentalists-tests.s3-website.eu-west-2.amazonaws.com/today.html'
#url = "http://bbc.co.uk"
#url = 'https://www.theguardian.com/world/2020/oct/22/england-test-and-trace-reaching-fewer-covid-contacts-than-ever'
#url = 'https://www.bbc.co.uk/news/uk-54234084'
url = "https://socialistworker.co.uk/"
a = lambda_handler({"url":url}, "")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ana\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
    #### Intended object to return:
    # {
    #   'url':'http://bbc.co.uk',
    #   'article' : {
    #     'header' : 'An Article Title',
    #     'summary' : 'The Article Summary',
    #     'keywords' : ['Boris Johnson', 'Brexit']
    #   },
    #   'results' : [
    #     { 'type' : 'credibility' ...... },
    #     { 'type' : 'polarity' ..... },
    #     { 'type' : 'subjectivity' .....},
    #     { 'type' : 'biasscore' .....}
    #   ]
    # }


In [20]:
a

{'url': 'https://socialistworker.co.uk/',
 'results': [{'type': 'credibility',
   'outcome': {'error': 'The credibility score was not available.'}},
  {'type': 'polarity', 'outcome': 0.16428571428571428},
  {'type': 'subjectivity', 'outcome': 0.5726190476190476},
  {'type': 'objectivity', 'outcome': 0.4273809523809524}],
 'article': {'header': 'Socialist Worker (Britain)',
  'summary': 'CommentWeak and nasty” has been a good description of all Tory governments since the fall of Margaret Thatcher nearly 30 years ago.\nNone fit this description better than Boris Johnson’s administration.\nAfter the appointment of anti-choice judge Amy Coney Barrett to the US Supreme Court, Sarah Bates looks at why opposing abortion is a defining issue for the right',
  'keywords': ['thatcher',
   'worker',
   'sarah',
   'tory',
   'nasty',
   'socialist',
   'nearly',
   'supreme',
   'opposing',
   'margaret',
   'right',
   'britain',
   'description']}}

In [80]:
a['results'][0]

{'type': 'credibility', 'outcome': {'score': -1}}

In [81]:
a['results'][1]

{'type': 'polarity', 'outcome': -1.0}

In [82]:
a['results'][2]

{'type': 'subjectivity', 'outcome': 1.0}

In [86]:
a['article']['header'] ##'Test'

'Test'

In [88]:
a['article']['summary'] ##'Horrible Day!\nToday is a horrible day.\nToday is a horrible day.\nToday is a horrible day.'

'Horrible Day!\nToday is a horrible day.\nToday is a horrible day.\nToday is a horrible day.'

In [89]:
a['article']['keywords'] ##['test', 'horrible', 'today', 'day', 'daytoday']

['test', 'horrible', 'today', 'day', 'daytoday']

In [17]:
1 - 0.5121532634032635

0.4878467365967365

In [18]:
abs(1 - 1)

0