In [None]:
##Longhand - M. Cook - 2022
##Takes Bag-of-Words and returns multi-user, immersive visualization
##https://github.com/Cook4986/Longhand

In [None]:
#Takes local image set and returns absolute filepaths (.txt) for Handprint input + lookup table (.csv)
import pandas as pd
import os
import sys
from pathlib import Path

#inputs
target ="..." #target image directory

#outputs
pathsOut = open("...", "w") # for Handprint
tableOut = "...csv"# transcription batch csv

#dataframe
df = pd.DataFrame()

#cross-check that DRS FILE-OSN values exist in target directory and add matches to pathsOut + dataframe
for path in sorted(Path(target).rglob('*.JPG')):
    absolute = (str(path.parent) + "/" + path.name) #absolute path for images
    pathsOut.write(str(absolute)) #write paths to pathsOut
    pathsOut.write("\n")
    df = df.append({'NAME':path.stem,'IMG-PATH':absolute}, ignore_index=True) #append data frame  
    print("image " + path.stem + " located " + "at " + absolute) #console out

#create new lookup table from dataframe
with open(tableOut, mode = 'a') as f:
    df.to_csv(f,index=False) #append tableOut with FILE-OSN and IMG-PATH values

print("\n")
print("lookup table created for corpus")
pathsOut.close()

In [None]:
#Run Handprint using paths from local file
##Mike Hucka designed and implemented Handprint beginning in mid-2018.
##installation instructions at https://github.com/caltechlibrary/handprint

##generate Microsoft results
!handprint --service microsoft -@"....txt" --from-file "....txt" --no-grid --extended --output "..."


In [None]:
#Takes preliminary lookup table (CSV) and outputs updated table with locations for Handprint outputs, by file type
import pandas as pd
import os
import sys
from pathlib import Path

#Declarations
table = ".../Baptismal Records/Lag 1838-1869/LookupTable.csv" ##Lookup table
hpOut =".../Baptismal Records/Lag 1838-1869/Lag 1838-1869_MSFT"##HandPrint Outputs
types = ['MSFT-IMG','MSFT-JSON','MSFT-TXT']##additional lookup table columns
suffX = ['.png','.json','.txt']#OCR file types
num = 0 ##counter
df = [1, 2, 3] #empty dataframes

#match DRS "FILE-OSN" to OCR outputs and append new dataframes 
while num < len(df):  
    df[num] = pd.DataFrame()
    for path in sorted(Path(hpOut).rglob('*' + str(suffX[num]))): #Loop through file types
        #define location and match to DRS's "FILE-OSN" column value
        absolute = (str(path.parent) + "/" + path.name)
        match = path.stem.split('.')
        fileCore = pd.read_csv(table, usecols=['FILE-OSN'])
        #append dataframe with matching values
        for value in fileCore.values:
            if value == match[0]:
                df[num] = df[num].append({types[num]:absolute}, ignore_index=True)
                print("\n")
                print("dataframe " + str(num + 1) + " appended with " + suffX[num] + " located at " + absolute)
    num = num + 1   
    
#concatenate newly appended data frames with existing lookup table
conC = pd.concat(df, axis=1)
print("\n")
print("new dataframes concatenated")
out = pd.concat([pd.read_csv(table),conC], axis=1)

#update lookup table CSV
with open(table, mode = 'w') as f:
    out.to_csv(f,index=False, header=f.tell()==0) #append tableOut with FILE-OSN and IMG-PATH values
    print("\n")
    print("lookup table updated with HandPrint output locations for " + str(types) + "-type transcriptions")
    

In [None]:
import os
import sys
import re
from pathlib import Path

#input/output
paths = '....txt'#plain text list of urls or filenames (pre-existing)
target = '...' #HP outputs (pre-existing)
textOut = '....txt'#Bag-of-Words output

#declarations
BoW = open(textOut, "a")

#append bag-of-words with headers and transcriptions
for path in sorted(Path(target).rglob('*.txt')):
    header = path.stem.split('.')
    BoW.write("\n")
    BoW.write(header[0]) #write headers location to bag-of-words
    BoW.write("\n")
    print((header[0]) + " added to bag-of-words")
    absolute = (str(path.parent) + "/" + path.name)
    contents = open(absolute, "r") 
    BoW.write(str(contents.read())) #write transcriptions to bag-of-words
    print(("Transcription for " + header[0]) + " added to bag-of-words")
    BoW.write("\n")
BoW.close()
print("Bag-of-words generated and output from corpus")


In [None]:
#Takes corpus Bag-of-Words and returns json dump of common nouns, Sketchfab models/uids, and relative percentage of occurance
import spacy
from collections import Counter
import json
import requests
from requests.exceptions import RequestException
import time

#declarations
nlp = spacy.load("en_core_web_sm")
BoW = "....txt"
SKETCHFAB_API_URL = "https://api.sketchfab.com/v3/search?type=models&count=1" #note count parameter
API_TOKEN = '046f786a59eb4112b9b89ba26a85f85f'
output = ".../objects.txt"
count = 0
start = time.time()

#data structures
nouns = [] # all nounds in Bag-of-Words
freqs = [] # noun appearances in BoW
objects = {} # key = [common nouns]; value(s) = [uids, percentage, model name]
    
#parse Bag-of-Words parts-of-speech with Spacy (english)
with open(BoW, encoding="utf-8") as file:
    iliad = file.read()
document = nlp(iliad)
document.text
for token in document:
    if token.pos_ == 'NOUN':
        nouns.append(token.text)
file.close()

#frequency & dictionary with common words and percentages
word_freq = Counter(nouns)
common = word_freq.most_common(100)
for word in common:
    freqs.append(word[1])
Sum = sum(freqs)
for word in common:
    flowt = (word[1] / Sum) * 100
    percentage = round(flowt, 2)
    objects[word[0]] = [percentage]

#Sketchfab API payload function 
##From https://sketchfab.com/developers/data-api/v3/python#example-python-model
def _get_request_payload(*, data=None, files=None, json_payload=False):
    """Helper method that returns the authentication token and proper content type depending on
    whether or not we use JSON payload."""
    data = data or {}
    files = files or {}
    headers = {'Authorization': 'Token {}'.format(API_TOKEN)}
    if json_payload:
        headers.update({'Content-Type': 'application/json'})
        data = json.dumps(data)
    return {'data': data, 'files': files, 'headers': headers}

#query sketchfabs with nouns in BoW and return/write list of uids + model names
for key in objects.keys():
    print("Searching: " + str(key))
    print("\n")
    query = ("&q="+(str(key))+"&downloadable=true")
    search_endpoint = f'{SKETCHFAB_API_URL + query}'
    payload = _get_request_payload() 
    response = requests.get(search_endpoint, **payload)
    data = response.json()
    for item in range(len(data['results'])):
        uid = (data['results'][item]['uid'])
        name = (str((data['results'][item]['name'])))
        objects[key] += [name, uid]
        print("the following model been located: ")
        print(str((data['results'][item]['name']))+(" \nuid: ")+(data['results'][item]['uid']))
        print("\n")
        uids.append(name +", "+ uid)

#write to disk and close program
with open(output, 'w') as file:
    file.write(json.dumps(objects)) 
file.close()
print("Objects written to disk")
print("\n")
end = time.time()
print(str(end - start) + " seconds elapsed" )
print("\n")
print("have a nice day")


In [None]:
##Launches Blender from terminal and initiates bpy script
!/.../Blender --python /.../operator_file_import.py


In [None]:
##TO DO:
#Place search results in the scene, without overlap, using frequencies
#export scene (gltf) for use in Hubs
#automate scene change function in Hubs, based on newly created .glb
#Compare: 
##viewing angle limitations and text legibility)
##                     vs.
##non-accidental (3D) object properties and object recognition