In [1]:
##Longhand - M. Cook - 2022
##Takes text corpus and returns immersive visualization
##https://github.com/Cook4986/Longhand

In [2]:
#Takes Bag-of-Words and returns json dump of common nouns, Sketchfab models/uids, and relative percentage of occurance
import spacy
from collections import Counter
import json
import requests
from requests.exceptions import RequestException
import time
from pathlib import Path

#select language model (https://spacy.io/models)
model = "en_core_web_lg"
nlp = spacy.load(model)
nlp.max_length = 100000000
stopwords = nlp.Defaults.stop_words
custom_stopwords = open("/Users/matthewcook/Dropbox/Viz/Longhand/Longhand/longhandStopWords.txt", "r")
custom_stopwords = custom_stopwords.read()
custom_stopwords = custom_stopwords.splitlines()

#I/O
BoW = "/Users/matthewcook/Dropbox/Viz/Longhand/Corpora/Astounding Stories, Sept. 1930.txt"#plaintext bag-of-words input
output = BoW.split(".txt")
stem = output[0].split("Corpora/")
title = stem[1]
output = stem[0] + "Objects/"+ title + ".txt"
log = stem[0] + "Objects/"+ title + "_log.txt"

#declarations
model_size = 1500 #face count
SKETCHFAB_API_URL = "https://api.sketchfab.com/v3/search?type=models&count=24"
API_TOKEN = '16e8b3dbbaf74db6b8dbb005df4b154f' #from Sketchfab - keep private
results = 50 #target number of models, potentially limited by NER outputs
slug = ["cultural-heritage-history"] #from: https://api.sketchfab.com/v3/categories
start = time.time()

#data structures
nouns = [] #nouns in Bag-of-Words
freqs = [] # noun appearance frequencies
objects = {} # key = common nouns; value(s) = [relative percentage of total objects, UID, model name, URL]

#parse Bag-of-Words with SpaCy
with open(BoW, encoding="utf-8") as file:
    print("Tokenizing text...")
    print("\n")
    iliad = file.read()
document = nlp(iliad)

#collate nouns in corpus
for token in document:
    if (token.text.lower() not in stopwords) & (token.text.lower() not in custom_stopwords) & (token.pos_ == 'NOUN') & ((len(token.lemma_)) > 2):
            nouns.append(token.lemma_)   
word_freq = Counter(nouns) 
common = word_freq.most_common(results)
print("Common nouns named in the target corpus: ")
print("\n")
print(common)
print("\n")

#Sketchfab API payload function 
##From https://sketchfab.com/developers/data-api/v3/python#example-python-model
def _get_request_payload(*, data=None, files=None, json_payload=False):
    """Helper method that returns the authentication token and proper content type depending on
    whether or not we use JSON payload."""
    data = data or {}
    files = files or {}
    headers = {'Authorization': 'Token {}'.format(API_TOKEN)}
    if json_payload:
        headers.update({'Content-Type': 'application/json'})
        data = json.dumps(data)
    return {'data': data, 'files': files, 'headers': headers}

#query sketchfabs with tokens and compile object dictionary with results
for word in common:
    key = str(word[0])
    #query = ("&q="+(key)+"&user=hmane"+"&downloadable=true&max_face_count=" + str(model_size))
    query = "&q="+(key)+"&tags="+(key)+"&downloadable=true&max_face_count="+ str(model_size) #keys must be in name and tag
    #query ="&q="+(key)+"&downloadable=true&max_face_count="+ str(model_size) 
    search_endpoint = f'{SKETCHFAB_API_URL + query}'
    payload = _get_request_payload() 
    response = requests.get(search_endpoint, **payload)
    data = response.json()
    #print(data)
    #parse json response
    for item in range(len(data['results'])):
        url = (data['results'][item]['uri'])
        uid = (data['results'][item]['uid'])
        name = (str((data['results'][item]['name'])))
        if 'name' in (str((data['results'][item]['tags']))):
            tag = (str((data['results'][item]['tags'][0]['name'])))
            tag = tag.strip("'")
        size = int(data['results'][item]['faceCount'])
        #string matching keys against Sketchfab object names and tags
        #if (((key.lower() in name.lower())) & (tag == key.lower())) & (key not in objects) & (size != 0):
        if (key.lower() in name.lower()) & (key not in objects) & (size != 0):
            freqs.append(word[1])
            objects[word[0]] = [word[1]]
            objects[key] += [name, uid, url, size]     
print("\n")      

#write object (output) dictionary to disk 
with open(output, 'w') as file:
    file.write(json.dumps(objects)) 
    file.close()

#print hits and relative percentages in target corpus
Sum = sum(freqs)
for key,value in objects.items():
    print("Model located for '" + key + "':")
    print(value[1].center(24))
    flowt = (value[0] / Sum) * 100
    percentage = round(flowt, 2)
    print("Represents " + str(percentage) + "% of models identified.")
    if value[4] > 10000:
                print("Warning: Model size exceeds 10000 faces " + "(" + (str(value[4])) + " faces)")
    print("\n")
print("\n")
print(str(len(objects)) + " suitable models located on Sketchfab written to disk")
print("\n")

#write log + terminate program
end = time.time()
print(str(end - start) + " seconds elapsed" )
key = "key"
with open(log, 'w') as log:
    log.write(title)
    log.write("\n")
    log.write("Using " + model + " spaCy model.")
    log.write("\n")
    log.write("max model size = " + str(model_size) + " polys")
    log.write("\n")
    if word_freq == Counter(nouns):
        log.write("parsing Nouns")
        log.write("\n")
    elif word_freq == Counter(entities):
        log.write("parsing Named Entities")
        log.write("\n")
    log.write(str(end - start) + " seconds elapsed" )
    log.write("\n")
    log.write((str(len(objects)) + " suitable models located on Sketchfab."))
    log.write("\n")
    log.write("SF query used: " + str(query))
    log.write("\n")
print("\n")
print("have a nice day")


Tokenizing text...


Common nouns named in the target corpus: 


[('man', 225), ('ship', 140), ('foot', 112), ('work', 106), ('light', 105), ('eye', 104), ('story', 97), ('cube', 90), ('hand', 89), ('people', 84), ('room', 82), ('city', 80), ('world', 79), ('water', 79), ('air', 79), ('hour', 77), ('head', 69), ('door', 64), ('voice', 62), ('night', 62), ('space', 60), ('face', 59), ('magazine', 58), ('arm', 57), ('wall', 56), ('order', 53), ('power', 51), ('house', 51), ('thing', 50), ('floor', 50), ('ball', 48), ('word', 47), ('street', 47), ('fire', 45), ('tentacle', 45), ('beetle', 44), ('ray', 43), ('glass', 42), ('mile', 40), ('sea', 40), ('child', 40), ('fish', 39), ('flyer', 38), ('sort', 36), ('ground', 36), ('insect', 36), ('year', 35), ('area', 35), ('hole', 35), ('woman', 34)]




Model located for 'man':
simple low poly man free download for tutorial
Represents 7.8% of models identified.


Model located for 'ship':
       space ship       
Represents 4.85% of models identi

In [None]:
##Launches Blender from terminal and initiates model download script
!/Applications/Blender.app/Contents/MacOS/Blender --python /Users/matthewcook/Dropbox/Viz/Longhand/Longhand/Longhand_downloader.py


Read prefs: /Users/matthewcook/Library/Application Support/Blender/3.3/config/userpref.blend
Register GLTF Exporter
Loading icon: spawn-point.png
Registering component: ambient-light
Registering component: ammo-shape
Registering component: audio
Registering component: audio-params
Registering component: audio-settings
Registering component: zone-audio-source
Registering component: audio-target
Registering component: audio-zone
Registering component: billboard
Registering component: directional-light
Registering component: environment-settings
Registering component: fog
Registering component: frustrum
Registering component: hemisphere-light
Registering component: image
Registering component: link
Registering component: loop-animation
Registering component: media-frame
Registering component: model
Registering component: morph-audio-feedback
Registering component: nav-mesh
Registering component: networked
Registering component: particle-emitter
Registering component: personal-space-invade

In [None]:
##To-Do##
#Fork entities and nouns notebook(s)
#"objects" output global
#collision detection
#collect image covers (HathiTrust)
#100MB automatic decimation
#text (key) above models
#word2vec
#Read plaintext "title" line for log and 3d text
#Streamlit deployment
#text-to-3D (AI)
#package README in "other docs" on sketchfab
#pop-up UI, to (checkbox) include/exclude relevant Sketchfab models