# Transcribed Stories from Google Cloud Vision API
### `transcribed_stories.csv` has columns 'Submission Id' and 'Transcribed Text'
### `transcribe` method is used to create `transcription.py` which is used in the app

In [15]:
# Imports
import os.path
from google.cloud import vision
import io
from google.oauth2 import service_account
import pandas as pd

In [95]:
def transcribe(image_path):
    '''
    Reads in a single image, connects to Google Cloud Vision API's and returns 
    the transcribed text as a string
    '''

    # If image_path is local
    with io.open(image_path, 'rb') as image_file:
        content = image_file.read()
    image = vision.types.Image(content=content)

    # # If image_path is a uri
    # image = vision.types.Image()
    # image.source.image_uri = uri

    # Connect to Google API client
    creds = service_account.Credentials.from_service_account_file('/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Story Squad-6122da7459cf.json')
    client = vision.ImageAnnotatorClient(credentials=creds)
    response = client.document_text_detection(image=image)

    # Save transcribed text
    if response.text_annotations:
        transcribed_text = response.text_annotations[0].description.replace('\n', ' ')
    else:
        return 'No Text'

    return transcribed_text

In [96]:
def full_story(directory):
    '''
    Runs transcribe method on each page in a given file.
    '''
    # Create list of paths
    paths = []

    # For each image in the file, add the path to paths
    for item in os.listdir(directory):
        if item[-3:] == 'jpg':
            path = os.path.join(directory, item)
            paths.append(path)
    
    # Sort so pages are always in the correct order
    paths = sorted(paths)
    # Empty string to combine multiple pages of text together on
    full_text = ''

    # Feed each individual image into google api
    for image_path in paths:
        full_text += transcribe(image_path) 

    return full_text

In [68]:
# Test a single file
directory = os.path.join('/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud', 'Stories Dataset', "Transcribed Stories", '32--', '3201')

print(full_story(directory))

-3201 If you think there is moce embaressing then me your wrong because there's no such thing as luck in my world. And somehow I become the coolest kid in school and the teachers pet. normal day Swiettie Shume hece gizli ile said I paused Jessie peaples at me like I per crazy then hissed Im not spying someone just came out of the haunted house it gets worse every time you that house was abandon facenfocaratian look, cl opened the curtain I saw Swietle then I saw a man walk lout, She looping an people 3201 Next thing you know the lights twen off , I hear a scream I relo iliaed it was Tessie. After that it blank Swiettie saved us and I was the coolest, kid ever. I still don't know what happened but I'm cool for that. ar 


In [69]:
# Pull out each parent folder (31--, 32--, 51--, 52--) in the given Stories Dataset
parent_path = '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories'

folders = []

for route in os.listdir(parent_path):
    if route != '.DS_Store':
        folders.append(os.path.join(parent_path, route))
print(folders)

['/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/32--', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/52--', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/51--']


In [70]:
# Pull out each submission path inside the parent folder
paths = []

for file in folders:
    for path in os.listdir(file):
        if path != '.DS_Store':
            paths.append(os.path.join(file, path))
print(paths)

set/Transcribed Stories/31--/3105', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3129', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3116', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3111', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3118', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3127', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3120', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Transcribed Stories/31--/3121', '/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Stories Dataset/Tran

In [72]:
# Test one returned path
test_path = paths[1]

full_story(test_path)

"3106 D she was very, a berenang The pony that didn't know . Once there was a cong to. Her name is show light Star Chrismas morning she escaped to know why she was treated like thin ain said to hersh where should to The next day she found Os pony Snow light sound She ran to find the head- ก in the tallest tower, she asked can help me find why no one headmiste said thats easy, because I wanted you here. Snowlight said why worden getminant car The head mistress said come on your most of Saipan tha pog. By the way, my The next day she had already made tons of friends. Their names are Elisa Tessa and Lina. One day, because her ' they came right to her and told her the she stopped her hoors and every thing.They were planning a sypin hame 3104 2 party for her and she ruined it being a suprise! She selt so sorry. She learned to wait for her friends to tell her. She also learned she was magical. The End "

In [97]:
# Creates list of entries to convert into a df

data_list = []

for path in paths:
    # Gives the story ID
    print(path.strip()[-4:])
    transcribed_text = full_story(path)
    story_id = path.strip()[-4:]
    entry = {"Submission ID": story_id, "Transcribed Text": transcribed_text}
    data_list.append(entry)


3132
3104
3103
3117
3102
3105
3129
3116
3111
3118
3127
3120
3121
3119
3126
3110
3128
3131
3109
3107
3106
3101
3108
3130
3112
3115
3123
3124
3125
3122
3114
3113
3245
3216
3229
3211
3227
3218
3220
3243
3244
3221
3226
3219
3210
3217
3228
3235
3232
3204
3203
3202
3205
3234
3212
3215
3223
3224
3248
3241
3246
3225
3222
3214
3213
3247
3240
3231
3209
3236
3238
3207
3239
3206
3201
3208
3237
3230
5254
5253
5262
5236
5209
5207
5238
5263
5264
5252
5255
5206
5239
5230
5237
5208
5215
5224
5223
5248
5246
5241
5222
5225
5213
5214
5240
5247
5249
5232
5235
5203
5204
5250
5257
5261
5259
5205
5202
5234
5233
5258
5260
5256
5251
5242
5245
5229
5216
5220
5218
5227
5244
5243
5219
5221
5228
5217
5210
5115
5112
5124
5123
5122
5125
5113
5114
5109
5131
5107
5101
5106
5130
5108
5111
5116
5129
5120
5118
5126
5119
5121
5117
5110
5132
5103
5104
5105
5102


In [98]:
# Check that all of the stories were transcribed
len(data_list)

167

In [99]:
# Build df

df = pd.DataFrame(data_list)

df.head()

Unnamed: 0,Submission ID,Transcribed Text
0,3132,Page. I 3132 Once there was a little cheatah a...
1,3104,"3106 D she was very, a berenang The pony that ..."
2,3103,3103 Rainbow the Unica unicom named some een P...
3,3117,3117 O gum drop land gumdrop. land is prace We...
4,3102,3102 The secret fifth grade E am Anella I am s...


In [100]:
# Save df
df.to_csv('transcribed_stories.csv')