## 1. Dependencies

#### 1.1 Install required packages

In [None]:
pip install --upgrade "ibm-watson>=4.0.1"

In [None]:
pip install watson-developer-cloud==1.5

In [None]:
pip install requests

#### 1.2 Imports

In [None]:
import requests
import json
import os
import base64
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
from ibm_watson import LanguageTranslatorV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, SemanticRolesOptions, SentimentOptions, EmotionOptions, ConceptsOptions, CategoriesOptions

#### 1.3 Global Variables

In [None]:
# Specify the url for the below microservices
url_object_storage_operations = ''
url_image_preprocessor = ''
url_text_extractor = ''

# Specify the image name and bucket name
newspaper_img = 'newspaper_hindi.jpg'
bucket_name = ""

# Fill language translator credentials
language_translator_api_key = ''
language_translator_url = 'https://gateway-lon.watsonplatform.net/language-translator/api'

#Fill Natural Language Understanding credentials
nlu_api_key = ''
nlu_url = 'https://gateway-lon.watsonplatform.net/natural-language-understanding/api'

addr_hindi = []
addr_english = []
img_after_preprocess = []

## 2. Appsody stack with Python Flask and Object storage support

#### 2.1 Get the image from Cloud Object Storage

In [None]:
PARAMS = {'buket':bucket_name , 'filename':newspaper_img}
r = requests.get(url = url_object_storage_operations + '/getfile', params = PARAMS) 
r.text

In [None]:
r = requests.get(url = url_object_storage_operations + '/getimage') 
d = r.text.encode('ASCII')
d

In [None]:
with open("image.jpg", "wb") as fh:
    fh.write(base64.decodebytes(d))

In [None]:
path_newspaper = os.getcwd() + '/image.jpg'
img=mpimg.imread(path_newspaper)
imgplot = plt.imshow(img)
imgplot = plt.title('Hindi Classsified')

## 3. Appsody stack with Python Flask and OpenCV support

#### 3.1 Preprocess the required image

In [None]:
mkdir images

In [None]:
files = {'file': open(path_newspaper, 'rb')}
response = requests.post(url_image_preprocessor + '/process', files=files)
images = response.json()

In [None]:
print(images)

In [None]:
response = requests.get(url_image_preprocessor + '/getimages')
images_data = response.json()
images_data

In [None]:
for k,v in images_data.items():
    d = v.encode('ASCII')
    with open("."+k, "wb") as fh:
        fh.write(base64.decodebytes(d))

In [None]:
# Display the detected sections
img=mpimg.imread(os.getcwd() + '/images/sections.jpg')
imgplot = plt.imshow(img)
imgplot = plt.title('Detected different sections')
# Display the extracted sections
for k in images:
    if "sections" not in images[k]:
        img=mpimg.imread(os.getcwd() + images[k])
        img_after_preprocess.append(img)
plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(img_after_preprocess):
    plt.subplot(len(img_after_preprocess) / columns + 1, columns, i + 1)
    plt.title('After Preprocessing')
    plt.imshow(image)

## 4. Call Appsody stack with Python Flask and Tesseract support

#### 4.1 Extract the text using tesseract

In [None]:
print("Addresses Extraction:")
for i in images:
    str_image = images[i]
    if "sections" in str_image:
        print("----------------------")
    else :
        path_image = os.getcwd() + str_image
        print("----------------------")
        files = {'file': open(path_image, 'rb')}
        response = requests.post(url_text_extractor + '/extract', files=files)
        addr_hindi.append(response.text)
        print(response.text)

## 5. Call Watson Language Translator

#### 5.1 Translate language from Hindi to English

In [None]:
authenticator = IAMAuthenticator(language_translator_api_key)
language_translator = LanguageTranslatorV3(
    version='2019-10-15',
    authenticator=authenticator
)

language_translator.set_service_url(language_translator_url)

language_translator.set_disable_ssl_verification(True)

In [None]:
for i in addr_hindi:
    translation = language_translator.translate(
    text=i,
    model_id='hi-en').get_result()
    d = json.dumps(translation, indent=2, ensure_ascii=False)
    addr = translation['translations'][0]['translation']
    addr_english.append(addr)
    print(addr)

In [None]:
print(addr_english)

#### 5.2 Put the translated addresses to Cloud Object Storage and delete all the files used for processing

In [None]:
f = open("Translated_Addresses.txt", "a")
for i in addr_english:
    f.write(i + '\n')
f.close()    

In [None]:
path_file = os.getcwd() + '/Translated_Addresses.txt'
files = {'file': open(path_file, 'rb')}
response = requests.post(url_object_storage_operations + '/upload', files=files)
response

In [None]:
rm -r images # Delete the images stored after preprocessing

In [None]:
rm image.jpg # Delete the newspaper_hindi.jpg

In [None]:
rm Translated_Addresses.txt # Delete the Translated_Addresses.txt

In [None]:
ls

## 6. Call Watson Natural Language Understanding

#### 6.1 Using NLU service, for Analytics

In [None]:
apikey=nlu_api_key
url=nlu_url
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    iam_api_key=apikey,
    url=url
)

In [None]:
def extract_place(place):
    response = natural_language_understanding.analyze(
      text=place,
      features=Features(
                        entities=EntitiesOptions(),
                        keywords=KeywordsOptions()
                        ))
    return response
# Extract places from the addresses
s = ''
for j in addr_english:
    s = s+j
result = extract_place(s)

In [None]:
visual1= {}
visual2 = []
visualfinal={}
# Extract all the keywords along with their count
for i in result['keywords']:
    visual1[i['text']] = i['count']
# Extract all the entities along with their type
for i in result['entities']:
    if i['type'] == 'Location':
        visual2.append(i['text'])
# Gather all the possible locations
for k,v in visual1.items():
    if k in visual2:
        visualfinal[k]=v
        #location.append(k)
# Filter the data for state wise visualization
statevisual={}
locationvisual={}
location=[]
statesAndLocations = { 
                     'Gujarat' : 'Gandhinagar', 
                     'Maharashtra' : 'Mumbai', 
                     'Karnataka' : 'Bangalore', 
                     'West Bengal' : 'Kolkata'
                    } 
for k,v in visualfinal.items():
    for x,y in statesAndLocations.items():
        if k in y:
            statevisual[x]=v
            location.append(x)
        elif k in x:
            statevisual[k]=v
            location.append(k)
location = list(dict.fromkeys(location))# Removes duplicates from the list
statevisual

In [None]:
def plot(Location1,Location2):
    result = {}
    i = 0
    for k,v in statevisual.items():
        if Location1 == k:
            x1 = v
            result[Location1]=v
        if Location2 == k:
            x2 = v
            result[Location2]=v
    plt.bar(range(len(result)), list(result.values()), align='center')
    plt.xticks(range(len(result)), list(result.keys()))
    plt.title('Location wise comparision')
    plt.xlabel('Location')
    plt.ylabel('Number of times appeared')
    plt.ylim(0,4)
    for k,v in result.items():
        plt.annotate(v, xy=(i,v), xytext=(i, v + 0.1))
        i = i + 1
    plt.show()
def piechart():
    fig, ax = plt.subplots()
    ax.pie(list(statevisual.values()),autopct='%1.1f%%',
            shadow=True)
    ax.legend(statevisual.keys(),
              title="States",
              loc="center left",
              bbox_to_anchor=(1, 0, 0.5, 1))
    ax.set_title("% of addresses from different states")
    plt.show()
piechart()
print("--------------------------------------------------------------")
if ("dsxuser" not in os.getcwd()):
        res = interact(plot, Location1 = location, Location2 = location)

#### 6.2 NOTE : Use the below cell if you are using Watson Studio

In [None]:
# Enter two location you want to compare
location1= "Karnataka"
location2= "West Bengal"

In [None]:
if ("/home/dsxuser" in os.getcwd()):
    res = interact(plot, Location1 = location1, Location2 = location2)

## 7. Display all the required adresses.

#### 7.1 Search address based on location or pincode

In [None]:
x = widgets.Text(value='',description='Location :', disabled=False)
if ("/home/dsxuser" not in os.getcwd()):
    display(x)
def callback(text):
    flag = 0
    print("Results for",text.value,":")
    for j in addr_english:
        if text.value.lower() in j.lower() :
            flag = 1
            print(j)      
    if flag == 0 :
        print("No address found")
    print("")  
x.on_submit(callback)

#### 7.2 NOTE : Use the below cell if you are using Watson Studio

In [None]:
def callback_watson_studio(text):
    flag = 0
    print("Results for",text,":")
    for j in addr_english:
        if text.lower() in j.lower() :
            flag = 1
            print(j)      
    if flag == 0 :
        print("No address found")
    print("") 

In [None]:
# Enter a location below. This will extract all the addresses detected from specified location.
location = "Maharashtra"

In [None]:
if ("/home/dsxuser" in os.getcwd()):
        callback_watson_studio(location)