# spacy

In [25]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [26]:
# definition from wiki 
text = """In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub
          which has been launched to empower the next generation of students with AI-ready skills.
         Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100
          institutions with AI infrastructure, course content and curriculum, developer support,
          development tools and give students access to cloud and AI services.
          As part of the program, the Redmond giant which wants to expand its reach and is
          planning to build a strong developer ecosystem in India with the program will set up the
          core AI infrastructure and IoT Hub for the selected campuses.
          The company will provide AI development tools and Azure AI services such as
          Microsoft Cognitive Services, Bot Services and Azure Machine Learning.
          According to Manish Prakash, Country General Manager-PS, Health and Education,
          Microsoft India, said, "With AI being the defining technology of our time,
          it is transforming lives and industry and the jobs of tomorrow will
          require a different skillset. This will require more collaborations and
          training and working with AI. That’s why it has become more critical than ever for
          educational institutions to integrate new cloud and AI technologies.
          The program is an attempt to ramp up the institutional set-up and build
          capabilities among the educators to educate the workforce of tomorrow."
          The program aims to build up the cognitive skills and in-depth understanding of
          developing intelligent cloud connected solutions for applications across industry.
          Earlier in April this year, the company announced Microsoft Professional
          Program In AI as a learning track open to the public.
          The program was developed to provide job ready skills to programmers who wanted to hone their
          skills in AI and data science with a series of online courses which featured hands-on labs and expert instructors as well.
          This program also included developer-focused AI school that provided a bunch of assets to help build AI skills."""


In [27]:
stopwords = list(STOP_WORDS)


In [28]:
nlp = spacy.load('en_core_web_sm')

In [29]:
doc = nlp(text)

In [30]:
tokens = [token.text for token in doc]
print(tokens)

['In', 'an', 'attempt', 'to', 'build', 'an', 'AI', '-', 'ready', 'workforce', ',', 'Microsoft', 'announced', 'Intelligent', 'Cloud', 'Hub', '\n          ', 'which', 'has', 'been', 'launched', 'to', 'empower', 'the', 'next', 'generation', 'of', 'students', 'with', 'AI', '-', 'ready', 'skills', '.', '\n         ', 'Envisioned', 'as', 'a', 'three', '-', 'year', 'collaborative', 'program', ',', 'Intelligent', 'Cloud', 'Hub', 'will', 'support', 'around', '100', '\n          ', 'institutions', 'with', 'AI', 'infrastructure', ',', 'course', 'content', 'and', 'curriculum', ',', 'developer', 'support', ',', '\n          ', 'development', 'tools', 'and', 'give', 'students', 'access', 'to', 'cloud', 'and', 'AI', 'services', '.', '\n          ', 'As', 'part', 'of', 'the', 'program', ',', 'the', 'Redmond', 'giant', 'which', 'wants', 'to', 'expand', 'its', 'reach', 'and', 'is', '\n          ', 'planning', 'to', 'build', 'a', 'strong', 'developer', 'ecosystem', 'in', 'India', 'with', 'the', 'program'

In [31]:
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [32]:
# get the frequency for each word
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] +=1

In [33]:
print(word_frequencies)

{'attempt': 2, 'build': 5, 'AI': 14, 'ready': 3, 'workforce': 2, 'Microsoft': 4, 'announced': 2, 'Intelligent': 2, 'Cloud': 2, 'Hub': 3, '\n          ': 23, 'launched': 1, 'empower': 1, 'generation': 1, 'students': 2, 'skills': 5, '\n         ': 1, 'Envisioned': 1, 'year': 2, 'collaborative': 1, 'program': 7, 'support': 2, '100': 1, 'institutions': 2, 'infrastructure': 2, 'course': 1, 'content': 1, 'curriculum': 1, 'developer': 3, 'development': 2, 'tools': 2, 'access': 1, 'cloud': 3, 'services': 2, 'Redmond': 1, 'giant': 1, 'wants': 1, 'expand': 1, 'reach': 1, 'planning': 1, 'strong': 1, 'ecosystem': 1, 'India': 2, 'set': 2, 'core': 1, 'IoT': 1, 'selected': 1, 'campuses': 1, 'company': 2, 'provide': 2, 'Azure': 2, 'Cognitive': 1, 'Services': 2, 'Bot': 1, 'Machine': 1, 'Learning': 1, 'According': 1, 'Manish': 1, 'Prakash': 1, 'Country': 1, 'General': 1, 'Manager': 1, 'PS': 1, 'Health': 1, 'Education': 1, 'said': 1, 'defining': 1, 'technology': 1, 'time': 1, 'transforming': 1, 'lives': 

In [34]:
max_freqyency = max(word_frequencies.values())
max_freqyency

23

In [35]:
# normalize frequencies
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word] / max_freqyency 
    
print(word_frequencies)

{'attempt': 0.08695652173913043, 'build': 0.21739130434782608, 'AI': 0.6086956521739131, 'ready': 0.13043478260869565, 'workforce': 0.08695652173913043, 'Microsoft': 0.17391304347826086, 'announced': 0.08695652173913043, 'Intelligent': 0.08695652173913043, 'Cloud': 0.08695652173913043, 'Hub': 0.13043478260869565, '\n          ': 1.0, 'launched': 0.043478260869565216, 'empower': 0.043478260869565216, 'generation': 0.043478260869565216, 'students': 0.08695652173913043, 'skills': 0.21739130434782608, '\n         ': 0.043478260869565216, 'Envisioned': 0.043478260869565216, 'year': 0.08695652173913043, 'collaborative': 0.043478260869565216, 'program': 0.30434782608695654, 'support': 0.08695652173913043, '100': 0.043478260869565216, 'institutions': 0.08695652173913043, 'infrastructure': 0.08695652173913043, 'course': 0.043478260869565216, 'content': 0.043478260869565216, 'curriculum': 0.043478260869565216, 'developer': 0.13043478260869565, 'development': 0.08695652173913043, 'tools': 0.08695

In [36]:
sent_tokens = [sent for sent in doc.sents]
print(sent_tokens)

[In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub
          which has been launched to empower the next generation of students with AI-ready skills., 
         Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100
          institutions with AI infrastructure, course content and curriculum, developer support,
          development tools and give students access to cloud and AI services., 
          As part of the program, the Redmond giant which wants to expand its reach and is
          planning to build a strong developer ecosystem in India with the program will set up the
          core AI infrastructure and IoT Hub for the selected campuses., 
          The company will provide AI development tools and Azure AI services such as
          Microsoft Cognitive Services, Bot Services and Azure Machine Learning., 
          , According to Manish Prakash, Country General Manager-PS, Health and Education,
   

In [37]:
# get score for each sentence
sentence_score = {}
for sent in sent_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_score.keys():
                sentence_score[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_score[sent] += word_frequencies[word.text.lower()] 

In [38]:
from heapq import nlargest

In [39]:
# select 30% sumarization
select_length = int(len(sent_tokens)*0.3)
select_length

3

In [40]:
summary = nlargest(select_length , sentence_score, key =sentence_score.get)
summary

[
           As part of the program, the Redmond giant which wants to expand its reach and is
           planning to build a strong developer ecosystem in India with the program will set up the
           core AI infrastructure and IoT Hub for the selected campuses.,
 
          Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100
           institutions with AI infrastructure, course content and curriculum, developer support,
           development tools and give students access to cloud and AI services.,
 According to Manish Prakash, Country General Manager-PS, Health and Education,
           Microsoft India, said, "With AI being the defining technology of our time,
           it is transforming lives and industry and the jobs of tomorrow will
           require a different skillset.]

In [41]:
final = [word.text for word in summary]
summary = ' '.join(final)

### review results

In [42]:
print(text)

In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub
          which has been launched to empower the next generation of students with AI-ready skills.
         Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100
          institutions with AI infrastructure, course content and curriculum, developer support,
          development tools and give students access to cloud and AI services.
          As part of the program, the Redmond giant which wants to expand its reach and is
          planning to build a strong developer ecosystem in India with the program will set up the
          core AI infrastructure and IoT Hub for the selected campuses.
          The company will provide AI development tools and Azure AI services such as
          Microsoft Cognitive Services, Bot Services and Azure Machine Learning.
          According to Manish Prakash, Country General Manager-PS, Health and Education,
          Micr

In [43]:
print(summary)


          As part of the program, the Redmond giant which wants to expand its reach and is
          planning to build a strong developer ecosystem in India with the program will set up the
          core AI infrastructure and IoT Hub for the selected campuses. 
         Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100
          institutions with AI infrastructure, course content and curriculum, developer support,
          development tools and give students access to cloud and AI services. According to Manish Prakash, Country General Manager-PS, Health and Education,
          Microsoft India, said, "With AI being the defining technology of our time,
          it is transforming lives and industry and the jobs of tomorrow will
          require a different skillset.


In [44]:
print("length Before {} \nlength After {}  ".format(len(text) , len(summary)))

length Before 2255 
length After 826  


# nltk