# Driver Decomposition

Given a list of drivers, decompose them into semantic objects containing:
1. Concept
2. Polarity (for now sense)
3. Adjective

In [3]:
import json
import spacy

from spacy.symbols import nsubj, VERB, NOUN, PROPN
from spacy import displacy

In [2]:
nlp = spacy.load('en_core_web_lg')
spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS

In [4]:
drivers = [line.rstrip('\n') for line in open('../drivers.txt')]
nounCountList = {}

### Raw drivers.

In [5]:
drivers

['clients acceptance of strategic imperatives',
 'new competitors',
 'businesses struggling to keep up with clients demands',
 'businesses struggling to conform to regulations',
 'increased education in technology field',
 'improved efficiency, usability, and lower costs',
 'increased data volume',
 'increased demand for data analytics',
 'increased speed efficiency, and lower cost',
 'increasing profits for our clients',
 'changing market driving the needs for new business model',
 'increased importance of bundling products',
 'increased dependency on devices (e.g. IoT)',
 'increased risk of cyber attacks',
 'increase in data privacy regulations',
 'currency appreciation against US dollar',
 'higher revenue for multinational companies',
 'lower demand for USD',
 'low price levels of goods and services',
 'low inflation',
 'decreasing trade deficit or increasing surplus',
 'exchange outlook is positive',
 'improved competitiveness',
 'increased foreign direct investment',
 'cheaper FX 

In [6]:
def cleanDriver(driver):
    return (' '.join(filter(lambda x: x.lower() not in spacy_stopwords,  driver.split())))

## Concept extraction:
1. Find Nouns.
2. Find children node of Nouns which have POS as VERB, ADJ, NOUN.
3. Find VERBS.
4. Look at Verb children and Noun interactions.

In [7]:
def genConceptJson(driver):
    
    #driver = cleanDriver(driver)
    
    res = {}
    res["concepts"] = {}
    res["concepts"]["count"] = 0
    i = 1

    for token in nlp(driver):

        if (token.pos in [NOUN, PROPN]):
            
            nounCountList[token.lemma_] = 1 if token.lemma_ not in nounCountList else nounCountList[token.lemma_] + 1
                
            res["concepts"]["count"] += 1
            res["concepts"]["concept_"+str(i)] = {}
            res["concepts"]["concept_"+str(i)]["lemma"] = token.lemma_

            if (len([child.lemma_ for child in token.children])):
                res["concepts"]["concept_"+str(i)]["sense"] = [child.lemma_ for child in token.children if (child.pos_ in ['VERB','ADJ','NOUN'])]

            i += 1 

    for token in nlp(driver):
        if (token.pos == VERB):
            children = [child.lemma_ for child in token.children]
            #print (children)

            if (len(children) > 0):
                for keys in res["concepts"]:
                    if keys != "count":

                        for child in children:
                            #print ("RES",res["concepts"][keys]["lemma"])
                            
                            if (res["concepts"][keys]["lemma"] == child):
                                res["concepts"][keys]["adj"] = token.lemma_
                                

    return (res)

In [8]:
sDrivers = ['government open to foreign Information Technology support, regulations in place']

In [9]:
conceptJson = {}
for driver in drivers:
    conceptJson[driver] = genConceptJson(driver)
    
#print(conceptJson)

## Driver list decomposed.

In [10]:
print (json.dumps(conceptJson, indent=4))

{
    "clients acceptance of strategic imperatives": {
        "concepts": {
            "count": 3,
            "concept_1": {
                "lemma": "client"
            },
            "concept_2": {
                "lemma": "acceptance",
                "sense": [
                    "client"
                ]
            },
            "concept_3": {
                "lemma": "imperative",
                "sense": [
                    "strategic"
                ]
            }
        }
    },
    "new competitors": {
        "concepts": {
            "count": 1,
            "concept_1": {
                "lemma": "competitor",
                "sense": [
                    "new"
                ]
            }
        }
    },
    "businesses struggling to keep up with clients demands": {
        "concepts": {
            "count": 3,
            "concept_1": {
                "lemma": "business",
                "sense": [
                    "struggle"
                ]
      

## Concepts by count.

In [11]:
sorted_nounList = sorted(nounCountList.items(), key=lambda kv: kv[1])
sorted_nounList

[('acceptance', 1),
 ('imperative', 1),
 ('field', 1),
 ('usability', 1),
 ('volume', 1),
 ('analytic', 1),
 ('speed', 1),
 ('model', 1),
 ('importance', 1),
 ('dependency', 1),
 ('IoT', 1),
 ('appreciation', 1),
 ('good', 1),
 ('competitiveness', 1),
 ('depreciation', 1),
 ('gdp', 1),
 ('ratio', 1),
 ('volatility', 1),
 ('order', 1),
 ('group', 1),
 ('method', 1),
 ('technique', 1),
 ('tension', 1),
 ('entrepreneur', 1),
 ('culture', 1),
 ('protection', 1),
 ('playing', 1),
 ('ground', 1),
 ('bribery', 1),
 ('reduction', 1),
 ('broadband', 1),
 ('internet', 1),
 ('disruptor', 1),
 ('dependability', 1),
 ('security', 1),
 ('aggregate', 1),
 ('official', 1),
 ('wealth', 1),
 ('burden', 1),
 ('help', 1),
 ('terrorism', 1),
 ('identify', 1),
 ('theft', 1),
 ('us', 1),
 ('availability', 1),
 ('innovation', 1),
 ('substitute', 1),
 ('growth', 1),
 ('boom', 1),
 ('state', 1),
 ('war', 1),
 ('behavior', 1),
 ('work', 1),
 ('worker', 1),
 ('declining', 1),
 ('infant', 1),
 ('advantage', 1),
 (

## Visualizing the dependency tree.

In [12]:
doc = nlp("increased dependency on devices")

for token in doc:
    print (token.lemma_, token.sentiment, token.right_edge, token.left_edge, [child for child in token.subtree])

increase 0.0 increased increased [increased]
dependency 0.0 devices increased [increased, dependency, on, devices]
on 0.0 devices on [on, devices]
device 0.0 devices devices [devices]


In [13]:
displacy.render(doc, style='dep')

In [14]:
doc = nlp("businesses struggling to keep up with clients demands")
displacy.render(doc, style = 'dep')

## Observations

### 1. Minor changes in the drivers can lead to major changes in the driver decomposition outupt.

In [15]:
doc = "businesses struggling to keep up with clients demands"
genConceptJson(doc)

{'concepts': {'count': 3,
  'concept_1': {'lemma': 'business', 'sense': ['struggle']},
  'concept_2': {'lemma': 'client'},
  'concept_3': {'lemma': 'demand', 'sense': ['business']}}}

In [16]:
displacy.render(nlp(doc), style='dep')

In [25]:
doc = "businesses struggling to keep up with client's demands"
genConceptJson(doc)

{'concepts': {'count': 3,
  'concept_1': {'lemma': 'business', 'sense': ['struggle']},
  'concept_2': {'lemma': 'client', 'sense': []},
  'concept_3': {'lemma': 'demand', 'sense': ['client']}}}

In [27]:
displacy.render(nlp(doc), style='dep')

 #### 2. Stopword removal is helpful in some cases but not in others.

Stopword removal - unfavourable

In [34]:
doc = nlp("increased dependency on devices")

In [35]:
displacy.render(doc, style='dep')

In [36]:
doc_cleaned = cleanDriver("increased dependency on devices")

In [37]:
doc_cleaned = nlp(doc_cleaned)

In [38]:
displacy.render(doc_cleaned, style='dep')

Stopword removal - favourable.

In [46]:
doc = nlp("restriction to operate with resources from foreign countries")

In [47]:
displacy.render(doc, style='dep')

In [48]:
doc = nlp(cleanDriver("restriction to operate with resources from foreign countries"))

In [49]:
displacy.render(doc, style='dep')

### Testing


In [30]:
doc = "restrictions clients expand businesses infrastructure"
genConceptJson(doc)

{'concepts': {'count': 4,
  'concept_1': {'lemma': 'restriction'},
  'concept_2': {'lemma': 'client', 'sense': ['restriction'], 'adj': 'expand'},
  'concept_3': {'lemma': 'business'},
  'concept_4': {'lemma': 'infrastructure',
   'sense': ['business'],
   'adj': 'expand'}}}