In [52]:
import requests
import re
from bs4 import BeautifulSoup
import spacy
import json
import pandas as pd

## RSS Feeds

In [2]:
gcp_url = 'https://cloud.google.com/feeds/functions-release-notes.xml'
azure_url = "https://azurecomcdn.azureedge.net/nl-nl/updates/feed/"
aws_url = "https://docs.aws.amazon.com/lambda/latest/dg/lambda-updates.rss"

## Classname data

In [13]:
with open('query_output.json') as json_file:
    neptune_data = json.load(json_file)

In [14]:
prefix = "http://www.semanticweb.org/stijn/ontologies/2022/4/faasdss#"
aws_s2 = {c['comment']['value'] :c['s2']['value'].replace(prefix, '')for c in neptune_data['results']['bindings']}

In [11]:
nlp = spacy.load("en_core_web_md")

In [15]:
aws_s2_nlp = [nlp(s2) for s2 in aws_s2]

In [47]:
# aws_s2.values()

## Parsing Lambda

In [6]:
response = requests.get(aws_url)
aws_soup = BeautifulSoup(response.content, features='xml')

In [83]:
"""Takes description and returns relevant section""" 

CLEANR = re.compile('<.*?>') 
CORE_FINDER = re.compile('(?<=Lambda now supports)(.*)(?=\.\s)')

def parse_item(raw_html):
    cleantext = re.sub(CLEANR, '', raw_html)
    try:
        coretext = re.search(CORE_FINDER,cleantext).group(0)
    except:
        coretext = cleantext
    return coretext

In [84]:
"""Finds similarities between input and all classes
    Returns top 3 including score""" 

def find_similar_classes(word):
    doc1 = nlp(word)
    sims = {}
    for doc2 in aws_s2_nlp:
        sims[(doc1, doc2)] = doc1.similarity(doc2)
    top_scores = sorted(sims.values(), reverse=True)[:3]
    top_sims = [{aws_s2[str(key[1])]:value} for key,value in sims.items() if value in top_scores]
    return top_sims

In [42]:
aws_soup.find_all(['item'])[:15]

[<item>
 <title>Node.js 16 runtime</title>
 <link>https://docs.aws.amazon.com/lambda/latest/dg/programming-model.html?icmpid=docs_lambda_rss</link>
 <description>Lambda now supports a new runtime for Node.js 16. Node.js 16 uses Amazon Linux 2. For details, see &lt;a href="https://docs.aws.amazon.com/lambda/latest/dg/programming-model.html?icmpid=docs_lambda_rss"&gt;Building Lambda functions with
             Node.js&lt;/a&gt;.</description>
 <pubDate>Wed, 11 May 2022 19:00:00 GMT</pubDate>
 <guid isPermaLink="false">https://docs.aws.amazon.com/lambda/latest/dg/#Node.js_16_runtime_2022-05-11</guid>
 </item>,
 <item>
 <title>Lambda function URLs</title>
 <link>https://docs.aws.amazon.com/lambda/latest/dg/lambda-urls.html?icmpid=docs_lambda_rss</link>
 <description>Lambda now supports function URLs, which are dedicated HTTP(S) endpoints for Lambda functions. For details, see &lt;a href="https://docs.aws.amazon.com/lambda/latest/dg/lambda-urls.html?icmpid=docs_lambda_rss"&gt;Lambda functio

In [49]:
NAMESPACE = 'faas:'
platform = 'AWSLambda'
triple_suggestions = []
for item in aws_soup.find_all(['item'])[:15]:
    ref = item.link.text
    date = item.pubDate.text
    title = item.title.text
    description = item.description.text

    # strip for core
    parsed_description = parse_item(description)
    # spacy magic
    doc = nlp(parsed_description)
    sims = find_similar_classes(parsed_description)
    
    data = []
    for token in doc:
        token_data = {"token": token.text, "pos": token.pos_}
        data.append(token_data)
    
    # All Propn's are candidate features
    candidate_features = [token['token'] for token in data  if token['pos'] == 'PROPN']
    
    triple_suggestions.append({"Title": title, "Description": description, "Parsed Description": parsed_description, 
                               "Date": date, "Reference": ref, "Superclass Suggestions": sims, 
                               "Feature Suggestions": candidate_features}
    )

  sims[(doc1, doc2)] = doc1.similarity(doc2)


In [79]:
df = pd.DataFrame.from_records(triple_suggestions)

In [80]:
df = df[df['Description'].str.startswith('Lambda now supports')]

In [81]:
df

Unnamed: 0,Title,Description,Parsed Description,Date,Reference,Superclass Suggestions,Feature Suggestions
0,Node.js 16 runtime,Lambda now supports a new runtime for Node.js ...,a new runtime for Node.js 16. Node.js 16 uses...,"Wed, 11 May 2022 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/p...,"[{'CICDPipelining': 0.741988389149947}, {'Runt...","[Node.js, Node.js, Amazon, Linux]"
1,Lambda function URLs,"Lambda now supports function URLs, which are d...","function URLs, which are dedicated HTTP(S) en...","Wed, 6 Apr 2022 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/l...,"[{'FunctionMarketplace': 0.8937052898719214}, ...","[HTTP(S, Lambda]"
2,Shared test events in the AWS Lambda console,Lambda now supports sharing test events with o...,sharing test events with other IAM users in t...,"Wed, 16 Mar 2022 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/t...,"[{'LicenseType': 0.8267513541347121}, {'Platfo...","[IAM, AWS]"
3,PrincipalOrgId in resource-based policies,Lambda now supports granting permissions to an...,granting permissions to an organization in AW...,"Fri, 11 Mar 2022 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/a...,"[{'LicenseType': 0.822717907973031}, {'Functio...",[AWS]
4,.NET 6 runtime,Lambda now supports a new runtime for .NET 6. ...,a new runtime for .NET 6,"Wed, 23 Feb 2022 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/l...,"[{'LicenseType': 0.7477392604822207}, {'Client...",[]
5,"Event filtering for Kinesis, DynamoDB, and Ama...",Lambda now supports event filtering for Kinesi...,"event filtering for Kinesis, DynamoDB, and Am...","Wed, 24 Nov 2021 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/i...,"[{'DeploymentMethods': 0.7504138202538165}, {'...","[Kinesis, DynamoDB, Amazon, SQS]"
6,mTLS authentication for Amazon MSK and self-ma...,Lambda now supports mTLS authentication for Am...,mTLS authentication for Amazon MSK and self-m...,"Fri, 19 Nov 2021 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/w...,"[{'CICDPipelining': 0.6830321168077201}, {'Dep...","[mTLS, Amazon, MSK, Apache, Kafka]"
7,Lambda on Graviton2,Lambda now supports Graviton2 for functions us...,Graviton2 for functions using arm64 architecture,"Wed, 29 Sep 2021 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/f...,[{'StreamProcessingPlatform': 0.80830392578667...,[arm64]
8,Python 3.9 runtime,Lambda now supports a new runtime for Python 3...,a new runtime for Python 3.9,"Mon, 16 Aug 2021 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/l...,"[{'CICDPipelining': 0.7235304447209447}, {'Run...",[Python]
10,Support for RabbitMQ as an event source on Lambda,Lambda now supports Amazon MQ for RabbitMQ as ...,Amazon MQ for RabbitMQ as an event source,"Wed, 7 Jul 2021 19:00:00 GMT",https://docs.aws.amazon.com/lambda/latest/dg/w...,[{'EventSourceIntegration': 0.682830762122754}...,"[Amazon, MQ, RabbitMQ]"


In [82]:
df.to_csv('DSS/results.csv')