In [1]:
import json
import requests
from pprint import pprint

from config import config 

In [2]:
endpoint = config.ENDPOINT
api_version="?api-version=2019-05-06"
#api_version = '?api-version=2019-05-06'
headers = {'Content-Type': config.CONTENT_TYPE,
        'api-key': config.API_KEY }

In [3]:
def construct_Url(endpoint, resource, resource_name, api_version, searchstring):
    if resource_name:
        
        if searchstring:
            return  endpoint + resource + "/" + resource_name + api_version + searchstring 
        else:
            return endpoint + resource + "/" + resource_name + api_version
    else:
        return endpoint + resource + api_version

In [4]:
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name=None, api_version=api_version, searchstring=None)
response  = requests.get(url, headers=headers)
index_list = response.json()
pprint(index_list)

{'@odata.context': 'https://alexa-for-docs.search.windows.net/$metadata#indexes',
 'value': []}


In [5]:
from haystack.file_converter.pdf import PDFToTextConverter
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
doc = converter.convert(file_path="./book/9781839217579-THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION.pdf", meta=None)

In [6]:
doc.keys(), type(doc)

(dict_keys(['text', 'meta']), dict)

In [6]:
doc_text=doc["text"]

In [8]:
doc_text[:1000]

'\x0cThe Deep Learning\nwith Keras Workshop\nSecond Edition\n\nAn Interactive Approach to Understanding\nDeep Learning with Keras\n\nMatthew Moocarme\nMahla Abdolahnejad\nRitesh Bhagwat\n\x0cThe Deep Learning with Keras Workshop\nSecond Edition\nCopyright © 2020 Packt Publishing\nAll rights reserved. No part of this book may be reproduced, stored in a retrieval system,\nor transmitted in any form or by any means, without the prior written permission of the\npublisher, except in the case of brief quotations embedded in critical articles or reviews.\nEvery effort has been made in the preparation of this book to ensure the accuracy of\nthe information presented. However, the information contained in this book is sold\nwithout warranty, either express or implied. Neither the authors, nor Packt Publishing,\nand its dealers and distributors will be held liable for any damages caused or alleged to\nbe caused directly or indirectly by this book.\nPackt Publishing has endeavored to provide trad

In [7]:
len(doc_text)

579763

In [8]:
# Specify the index definition, including the fields that define each search document. Fields have a name type, and attributes that determine how you can use the field. 

index_schema = {
    "name":"mlbook",
    "fields": [
        {
            "name": "book_name",
            "type": "Edm.String",
            "key": True
        },
        {
            "name": "content",
            "type": "Edm.String",
            "key": False,
            "searchable": True,
            "retrievable": True,
            "analyzer": "en.microsoft"           
        }
    ]
}



In [9]:
# Formulate the request. This POST request targets the indexes collection of your search service and creates an index based on the index schema you provided in the previous cell.
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name=None, api_version=api_version, searchstring=None)
response  = requests.post(url, headers=headers, json=index_schema)
index = response.json()
pprint(index)

{'@odata.context': 'https://alexa-for-docs.search.windows.net/$metadata#indexes/$entity',
 '@odata.etag': '"0x8D89AE5F9702D51"',
 'analyzers': [],
 'charFilters': [],
 'corsOptions': None,
 'defaultScoringProfile': None,
 'encryptionKey': None,
 'fields': [{'analyzer': None,
             'facetable': True,
             'filterable': True,
             'indexAnalyzer': None,
             'key': True,
             'name': 'book_name',
             'retrievable': True,
             'searchAnalyzer': None,
             'searchable': True,
             'sortable': True,
             'synonymMaps': [],
             'type': 'Edm.String'},
            {'analyzer': 'en.microsoft',
             'facetable': True,
             'filterable': True,
             'indexAnalyzer': None,
             'key': False,
             'name': 'content',
             'retrievable': True,
             'searchAnalyzer': None,
             'searchable': True,
             'sortable': True,
             'synonymMap

In [10]:
# Next, provide ddocuments that conform to the index schema. Specify an upload action for each document.
# we have only one document containing the text of the book

# The max length for UTF-8 encoded terms is 32.766 bytes. 
# len(doc_text[32766:]) =  546997


document = {
    "value": [
    { "@search.action": "upload",
    "book_name": "THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION",
    "content": doc_text[:10000]
    },
    { "@search.action": "upload",
    "book_name": "THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION",
    "content": doc_text[10000:20000]
    },
    { "@search.action": "upload",
    "book_name": "THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION",
    "content": doc_text[20000:30000]
    },
    ]
}

In [11]:
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name="mlbook/docs/index", api_version=api_version, searchstring=None)
response  = requests.post(url, headers=headers, json=document)
index_content = response.json()
pprint(index_content)

{'@odata.context': "https://alexa-for-docs.search.windows.net/indexes('mlbook')/$metadata#Collection(Microsoft.Azure.Search.V2019_05_06.IndexResult)",
 'value': [{'errorMessage': None,
            'key': 'THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION',
            'status': True,
            'statusCode': 201},
           {'errorMessage': None,
            'key': 'THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION',
            'status': True,
            'statusCode': 200},
           {'errorMessage': None,
            'key': 'THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION',
            'status': True,
            'statusCode': 200}]}


In [12]:
searchstring = '&search=layer'
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name="mlbook/docs", api_version=api_version, searchstring=searchstring)


response  = requests.get(url, headers=headers, json=searchstring)
query = response.json()
pprint(query)

{'@odata.context': "https://alexa-for-docs.search.windows.net/indexes('mlbook')/$metadata#docs(*)",
 'value': [{'@search.score': 0.119144924,
            'book_name': 'THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION',
            'content': '............... 298\n'
                       '\x0c'
                       "Exercise 9.02: Predicting the Trend of Alphabet's "
                       'Stock Price\n'
                       'Using an LSTM with 100 units '
                       '........................................................................ '
                       '300\n'
                       "Activity 9.02: Predicting Amazon's Stock Price\n"
                       'with Added Regularization '
                       '............................................................................. '
                       '304\n'
                       "Activity 9.03: Predicting the Trend of Amazon's Stock "
                       'Price Using\n'
                    

In [12]:
searchstring = '&search=deep learning'
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name="mlbook/docs", api_version=api_version, searchstring=searchstring)


response  = requests.get(url, headers=headers, json=searchstring)
query = response.json()
pprint(query)

. Reshape the data to add an extra dimension to the '
                       "end of X_train using NumPy's\n"
                       'reshape function:\n'
                       '\n'
                       '7. Import the following Keras libraries to build the '
                       'RNN:\n'
                       'from keras.models import Sequential\n'
                       'from keras.layers import Dense, LSTM, Dropout\n'
                       '\n'
                       '8. Set the seed and initiate the sequential model, as '
                       'follows:\n'
                       'seed = 1\n'
                       'np.random.seed(seed)\n'
                       'random.set_seed(seed)\n'
                       'model = Sequential()\n'
                       '\n'
                       '9. Add an LSTM layer to the network with 50 units, set '
                       'the return_sequences\n'
                       'argument to True, and set the input_shape argument to '
        

In [18]:
searchstring ='&search=layer&$count=false'

url = construct_Url(endpoint=endpoint, resource="indexes", resource_name="mlbook/docs", api_version=api_version, searchstring=searchstring)


response  = requests.get(url, headers=headers, json=searchstring)
query = response.json()
pprint(query)

 initiate the sequential model, as '
                        'follows:\n'
                        'seed = 1\n'
                        'np.random.seed(seed)\n'
                        'random.set_seed(seed)\n'
                        'model = Sequential()\n'
                        '\n'
                        '9. Add an LSTM layer to the network with 50 units, '
                        'set the return_sequences\n'
                        'argument to True, and set the input_shape argument to '
                        '(X_train.shape[1],\n'
                        '1). Add dropout to the model with rate=0.2. Add three '
                        'additional LSTM layers,\n'
                        'each with 50 units, and set the return_sequences '
                        'argument to True for the first\n'
                        'two. After each LSTM layer, add a dropout with '
                        'rate=0.2. Add a final output layer\n'
                        'of size 1:\n'
         

In [16]:
url = endpoint + "indexes/mlbook" + api_version
response  = requests.delete(url, headers=headers)

In [31]:
# Formulate the request. This POST request targets the indexes collection of your search service and creates an index based on the index schema you provided in the previous cell.
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name=None, api_version=api_version, searchstring=None)
response  = requests.post(url, headers=headers, json=index_schema)
index = response.json()
pprint(index)

{'@odata.context': 'https://alexa-for-docs.search.windows.net/$metadata#indexes/$entity',
 '@odata.etag': '"0x8D89AA81F13FF75"',
 'analyzers': [],
 'charFilters': [],
 'corsOptions': None,
 'defaultScoringProfile': None,
 'encryptionKey': None,
 'fields': [{'analyzer': None,
             'facetable': True,
             'filterable': True,
             'indexAnalyzer': None,
             'key': True,
             'name': 'book_name',
             'retrievable': True,
             'searchAnalyzer': None,
             'searchable': True,
             'sortable': True,
             'synonymMaps': [],
             'type': 'Edm.String'},
            {'analyzer': None,
             'facetable': True,
             'filterable': True,
             'indexAnalyzer': None,
             'key': False,
             'name': 'content',
             'retrievable': True,
             'searchAnalyzer': None,
             'searchable': True,
             'sortable': True,
             'synonymMaps': [],
  

In [34]:
# Next, provide ddocuments that conform to the index schema. Specify an upload action for each document.
# we have only one document containing the text of the book
document = {
    "value": [
    { "@search.action": "upload",
    "book_name": "THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION",
    "content": 'book/9781839217579-THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION.pdf'
    }
    ]
}

In [35]:
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name="mlbook/docs/index", api_version=api_version, searchstring=None)
response  = requests.post(url, headers=headers, json=document)
index_content = response.json()
pprint(index_content)

{'@odata.context': "https://alexa-for-docs.search.windows.net/indexes('mlbook')/$metadata#Collection(Microsoft.Azure.Search.V2019_05_06.IndexResult)",
 'value': [{'errorMessage': None,
            'key': 'THE_DEEP_LEARNING_WITH_KERAS_WORKSHOP_SECOND_EDITION',
            'status': True,
            'statusCode': 201}]}


In [36]:
searchstring = '&search=layer'
url = construct_Url(endpoint=endpoint, resource="indexes", resource_name="mlbook/docs", api_version=api_version, searchstring=searchstring)


response  = requests.get(url, headers=headers, json=searchstring)
query = response.json()
pprint(query)

{'@odata.context': "https://alexa-for-docs.search.windows.net/indexes('mlbook')/$metadata#docs(*)",
 'value': []}


In [25]:
# no documents where uploaded!
url = endpoint + "indexes/mlbook" + api_version
response  = requests.delete(url, headers=headers)