# Convert Image To Text 

## 1. Setup

To prepare your environment, you need to install some packages and enter credentials for the Watson services.

## 1.1 Install the necessary packages

You will need to install the following packages:
PIL: The Python Imaging Library (PIL) adds image processing capabilities to your Python interpreter.
pytesseract: Python-tesseract is a python wrapper for Google's Tesseract-OCR.
ibm-cos-sdk: Object Storage library for Python

In [None]:
!pip install pytesseract

In [None]:
!pip install ibm-cos-sdk

## 1.2 Import packages and libraries
Import the packages and libraries that you'll use:

In [None]:
try:
    import Image
except ImportError:
    from PIL import Image
import pytesseract

import ibm_boto3
from ibm_botocore.client import Config

import json
import requests

## 2. Configuration
Add configurable items of the notebook below

### 2.1 Global Variables
Add global variables.

In [None]:
req_paths=['/Users/muralidhar/Desktop/Data/Form1 copy 2.jpg']
#credentials_path='C:/Users/IBM_ADMIN/credentials/credentials.json'

### 2.2 Connect to Object Storage

In [None]:
'''Creating client...
'''

with open('/Users/muralidhar/Desktop/credentials.json') as data_file:
    credentials = json.load(data_file)
print("Service credential:")
print(json.dumps(credentials, indent=2))
endpoints = requests.get(credentials.get('endpoints')).json()

In [None]:
''' Identify the region based on the region of the cloud object storage
'''
endpoints

In [None]:
''' Creating Client
'''
iam_host = (endpoints['identity-endpoints']['iam-token'])
cos_host = (endpoints['service-endpoints']['cross-region']['us']['public']['us-geo'])
api_key = credentials.get('apikey')
service_instance_id = credentials.get('resource_instance_id')
# Constrict auth and cos endpoint
auth_endpoint = "https://" + iam_host + "/oidc/token"
service_endpoint = "https://" + cos_host

In [None]:
cos = ibm_boto3.client('s3',
                    ibm_api_key_id=api_key,
                    ibm_service_instance_id=service_instance_id,
                    ibm_auth_endpoint=auth_endpoint,
                    config=Config(signature_version='oauth'),
                    endpoint_url=service_endpoint)

In [None]:
response = cos.list_buckets()
buckets = [bucket['Name'] for bucket in response['Buckets']]
print("Current Bucket List:")
print(json.dumps(buckets, indent=2))
print("---")

In [None]:
''' Choose the desired bucket name as per your project's name on Watson Studio
'''

bucket_name='imagerecognitionpattern-donotdelete-pr-7whfpase0vr47w'

In [None]:
def put_file(filename, filecontents):
    '''Write file to Cloud Object Storage'''
    resp = cos.put_object(Bucket=bucket_name, Key=filename, Body=filecontents)
    return resp

def load_string(fileobject):
    '''Load the file contents into a Python string'''
    text = fileobject.read()
    return text

## 3. Convert
This function extracts text from the desired input image and stores in the text file

In [None]:
def convert(filename, name):
    print("name", name)
    img=Image.open(filename)
    text1 = pytesseract.image_to_string(img)
    print("Text from Image")
    print(text1)
    put_file( name, text1)

In [None]:
i=1
for f in req_paths:
    name='form-doc-'+str(i)+'.txt'
    convert(f,name)
    i=i+1