## IntelDragon Tailored News Feed

IntelDragon Tailored News Feed is a scalable artificial intelligence platform that identifies relevant trending cyber threats to an organization, industry, company, or product based on a written description of the topic of interest.

### Add User Input

In [None]:
# Example search query

search_query = "cyber attacks"

In [None]:
# Example input text

input_text = """A submarine communications cable is a cable laid on the sea bed between land-based stations to carry telecommunication signals across stretches of ocean and sea. The first submarine communications cables laid beginning in the 1850s carried telegraphy traffic, establishing the first instant telecommunications links between continents, such as the first transatlantic telegraph cable which became operational on 16 August 1858. Subsequent generations of cables carried telephone traffic, then data communications traffic. Modern cables use optical fibre technology to carry digital data, which includes telephone, Internet and private data traffic.

Modern cables are typically about 25 mm (1 in) in diameter and weigh around 1.4 tonnes per kilometre (2.5 short tons per mile; 2.2 long tons per mile) for the deep-sea sections which comprise the majority of the run, although larger and heavier cables are used for shallow-water sections near shore.[1][2] Submarine cables first connected all the world's continents (except Antarctica) when Java was connected to Darwin, Northern Territory, Australia, in 1871 in anticipation of the completion of the Australian Overland Telegraph Line in 1872 connecting to Adelaide, South Australia, and thence to the rest of Australia."""

### Load Modules

In [None]:
import subprocess
import sys
import newspaper
import json
from pygooglenews import GoogleNews
import pandas as pd
import numpy as np
from scipy import spatial
import datetime
from datetime import datetime, timedelta
import gensim
from gensim.models import Word2Vec

import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

import warnings
warnings.filterwarnings(action = 'ignore')

### Query Google News

In [None]:
date.today() - timedelta(days=1)

gn = GoogleNews()
s = gn.search(search_query, from_=(date.today() - timedelta(days=1)).strftime('%Y-%m-%d'), to_=(date.today()).strftime('%Y-%m-%d'))

### Extract News Articles

In [None]:
article_list = []

for i in range(0,len(s["entries"])):
  try:
    url = s["entries"][i]["link"]
    article = newspaper.Article(url=url, language='en')
    article.download()
    article.parse()
    article ={
      "title": str(article.title),
      "text": str(article.text),
      "authors": article.authors,
      "published_date": str(article.publish_date),
      "top_image": str(article.top_image),
      "videos": article.movies,
      "keywords": article.keywords,
      "summary": str(article.summary),
      "url": str(url)
    }
    article_list.append(article)
  except:
    pass

### Get Natural Language Processing Model

In [None]:
title = []
data = []
  
for a in article_list:
    for sentence in a["text"].split("\n"):
        sentence_list = []
        for word in sentence.split(" "):
            sentence_list.append(word)
        title.append(a["title"])
        data.append(sentence_list)

model = gensim.models.Word2Vec(data, min_count = 1, vector_size = 100, window = 5, sg = 1)

### Get News Article Vectors

In [None]:
article_titles = []
article_vectors = []
article_url = []
for article in article_list:
    if "Types of Cyber Attacks:" in article["title"]:
        pass
    else:
        sentences = article["text"].split("\n")
        for line in sentences:
            line_vector = []
            for word in line.split(" "):
                try:
                    line_vector.append(model.wv.get_vector(word))
                except:
                    pass
            if len(line_vector)>0:
                article_vectors.append(np.mean(line_vector, axis=0))
                article_titles.append(article["title"])
                article_url.append(article["url"])
        
data = np.array(article_vectors) # sentence vectors
labels = np.array(article_titles) # article titles
urls = np.array(article_url)

### Get User Input Text Vector

In [None]:
sentence_vector = []
line_vector = []
for sentence in input_text.replace("\n","").split("."):
    line_vector = []
    for word in sentence:
        try:
            line_vector.append(model.wv.get_vector(word))
        except:
            pass
        if len(line_vector)>0:
            sentence_vector = np.mean(line_vector, axis=0)

### Identify Relevant Results

In [None]:
tree = spatial.KDTree(data)

print(labels[tree.query(sentence_vector, k=5)[1][0]])
print(urls[tree.query(sentence_vector, k=5)[1][0]]+"\n")

print(labels[tree.query(sentence_vector, k=5)[1][1]])
print(urls[tree.query(sentence_vector, k=5)[1][1]]+"\n")

print(labels[tree.query(sentence_vector, k=5)[1][2]])
print(urls[tree.query(sentence_vector, k=5)[1][2]]+"\n")

print(labels[tree.query(sentence_vector, k=5)[1][3]])
print(urls[tree.query(sentence_vector, k=5)[1][3]]+"\n")

print(labels[tree.query(sentence_vector, k=5)[1][4]])
print(urls[tree.query(sentence_vector, k=5)[1][4]]+"\n")