# Convert Image To Text 

## 1. Setup

To prepare your environment, you need to install some packages and enter credentials for the Watson services.

## 1.1 Install the necessary packages

You will need to install the following packages:
PIL: The Python Imaging Library (PIL) adds image processing capabilities to your Python interpreter.
pytesseract: Python-tesseract is a python wrapper for Google's Tesseract-OCR.
ibm-cos-sdk: Object Storage library for Python

In [1]:
!pip install pytesseract



You are using pip version 9.0.1, however version 10.0.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [2]:
!pip install ibm-cos-sdk



You are using pip version 9.0.1, however version 10.0.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


## 1.2 Import packages and libraries
Import the packages and libraries that you'll use:

In [3]:
try:
    import Image
except ImportError:
    from PIL import Image
import pytesseract

import ibm_boto3
from ibm_botocore.client import Config

import json
import requests

## 2. Configuration
Add configurable items of the notebook below

### 2.1 Global Variables
Add global variables.

In [4]:
req_paths=['C:/Users/IBM_ADMIN/Desktop/Data/Rental_agreement_inserted.png']
#credentials_path='C:/Users/IBM_ADMIN/credentials/credentials.json'

### 2.2 Connect to Object Storage

In [5]:
'''Creating client...
'''

with open('C:/Users/IBM_ADMIN/Desktop/credentials.json') as data_file:
    credentials = json.load(data_file)
print("Service credential:")
print(json.dumps(credentials, indent=2))
endpoints = requests.get(credentials.get('endpoints')).json()

Service credential:
{
  "apikey": "M26l0YGtTMptoENI4KxIhKoZphyQpsrAG2c3Z_BBEyqJ",
  "cos_hmac_keys": {
    "access_key_id": "69bf27dc127f4303ac95ce4f550f2b13",
    "secret_access_key": "ddf0bd98fd6316c2604e9babbcaa4f14c0ad9170bde4d66f"
  },
  "endpoints": "https://cos-service.bluemix.net/endpoints",
  "iam_apikey_description": "Auto generated apikey during resource-key operation for Instance - crn:v1:bluemix:public:cloud-object-storage:global:a/19f37964ad6f30655b02c7c1c3f92c0c:e857a804-ea66-4403-af9f-dcb1f7be9ae1::",
  "iam_apikey_name": "auto-generated-apikey-69bf27dc-127f-4303-ac95-ce4f550f2b13",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::role:Administrator",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/19f37964ad6f30655b02c7c1c3f92c0c::serviceid:ServiceId-0f00013b-1f83-4ebe-a235-fd94cc90a5d5",
  "resource_instance_id": "crn:v1:bluemix:public:cloud-object-storage:global:a/19f37964ad6f30655b02c7c1c3f92c0c:e857a804-ea66-4403-af9f-dcb1f7be9ae1::"
}


In [6]:
''' Identify the region based on the region of the cloud object storage
'''
endpoints

{'identity-endpoints': {'iam-policy': 'iampap.bluemix.net',
  'iam-token': 'iam.bluemix.net'},
 'service-endpoints': {'cross-region': {'ap': {'private': {'Hong Kong': 's3.hkg-ap-geo.objectstorage.service.networklayer.com',
     'Seoul': 's3.seo-ap-geo.objectstorage.service.networklayer.com',
     'Tokyo': 's3.tok-ap-geo.objectstorage.service.networklayer.com',
     'ap-geo': 's3.ap-geo.objectstorage.service.networklayer.com'},
    'public': {'Hong Kong': 's3.hkg-ap-geo.objectstorage.softlayer.net',
     'Seoul': 's3.seo-ap-geo.objectstorage.softlayer.net',
     'Tokyo': 's3.tok-ap-geo.objectstorage.softlayer.net',
     'ap-geo': 's3.ap-geo.objectstorage.softlayer.net'}},
   'eu': {'private': {'Amsterdam': 's3.ams-eu-geo.objectstorage.service.networklayer.com',
     'Frankfurt': 's3.fra-eu-geo.objectstorage.service.networklayer.com',
     'Milan': 's3.mil-eu-geo.objectstorage.service.networklayer.com',
     'eu-geo': ' s3.eu-geo.objectstorage.service.networklayer.com'},
    'public': {'

In [7]:
''' Creating Client
'''
iam_host = (endpoints['identity-endpoints']['iam-token'])
cos_host = (endpoints['service-endpoints']['cross-region']['us']['public']['us-geo'])
api_key = credentials.get('apikey')
service_instance_id = credentials.get('resource_instance_id')
# Constrict auth and cos endpoint
auth_endpoint = "https://" + iam_host + "/oidc/token"
service_endpoint = "https://" + cos_host

In [8]:
cos = ibm_boto3.client('s3',
                    ibm_api_key_id=api_key,
                    ibm_service_instance_id=service_instance_id,
                    ibm_auth_endpoint=auth_endpoint,
                    config=Config(signature_version='oauth'),
                    endpoint_url=service_endpoint)

In [9]:
response = cos.list_buckets()
buckets = [bucket['Name'] for bucket in response['Buckets']]
print("Current Bucket List:")
print(json.dumps(buckets, indent=2))
print("---")

Current Bucket List:
[
  "cnn-donotdelete-pr-9xck8mbas9jav0",
  "imageanddocumentclassification-donotdelete-pr-asgjxxebuspap7",
  "nehaprojectfcf93098131e47fc98bf5a35c53613cf",
  "test1bd1b2ce1e0c4d7a96cca605a2bb0e99",
  "testdatalegaldocs",
  "travelprocessmanagement-donotdelete-pr-exgxpknefmd9b3",
  "trial",
  "trialdocclassifierd85dc7d8d1b44e2f8d783eb499e697ba",
  "try1e5849cebfc54c92a72ad6fcde9a73af"
]
---


In [10]:
''' Choose the desired bucket name as per your project's name on Watson Studio
'''

bucket_name='imageanddocumentclassification-donotdelete-pr-asgjxxebuspap7'

In [11]:
def put_file(filename, filecontents):
    '''Write file to Cloud Object Storage'''
    resp = cos.put_object(Bucket=bucket_name, Key=filename, Body=filecontents)
    return resp

def load_string(fileobject):
    '''Load the file contents into a Python string'''
    text = fileobject.read()
    return text

## 3. Convert
This function extracts text from the desired input image and stores in the text file

In [14]:
def convert(filename, name):
    img=Image.open(filename)
    text1 = pytesseract.image_to_string(img)
    file = open(name, "w+", encoding="utf-8")
    file.write(text1)
    put_file( name, text1)

In [15]:
i=6
for f in req_paths:
    name='form-doc-'+str(i)+'.txt'
    convert(f,name)
    i=i+1