In [None]:
import os
import openai
import pandas as pd
import json
from collections import Counter
import plotly.express as px
import plotly
from tqdm import tqdm
from ray.util.multiprocessing import Pool
import uuid

In [None]:
generate_poems = False # Set true if poems should be generated and cached

In [None]:
# Needed for querying style content from ChatGPT
if "OPENAI_API_KEY" not in os.environ:
    raise Exception("Missing OPENAI_API_KEY")

# Needed for generating poems
if generate_poems:
    from elevenlabs import generate, save
    if "ELEVEN_API_KEY" not in os.environ:
        raise Exception("Missing ELEVEN_API_KEY")

In [None]:
# The final subselection of architectural styles
selstylesmap={
    "Central & South America": [
      "Mesoamerican",
      #"Stilt House"
    ],
    "Anglo World": [
      "Ranch-style House",
      "Victorian",
      "Art Deco"
    ],
    "Intercultural": [
      "Colonial",
      "Brutalism",
      "Contemporary"
    ],
    "Europe": [
      "Ancient Greek",
      "Ancient Roman",
      "Romanesque",
      "Gothic",
      "Baroque",
      "Renaissance",
      "Neoclassical",
      "Art Nouveau",
      "Bauhaus"
    ],
    "North Eurasia": [
      "Muscovite",
      "Constructivism"
    ],
    "Sahel & Sub-Saharan Africa": [
      "Mud Brick",
      "Swahili"
    ],
    "Middle East & North Africa": [
      "Ancient Egyptian",
      "Modern Islamic"
    ],
    "South Asia": [
      "Gupta",
      "Indo-Saracenic"
    ],
    "East Asia": [
      "Chinese Imperial",
      "Edo Period"
    ],
    "Southeast Asia & South Pacific": [
      "Khmer Empire",
      "Thai"
    ],
    "Central Asia": [
      "Timurid Period",
      "Persian"
    ]
  }

In [None]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.request_timeout=30.0

In [None]:
## Load a list of architecture styles
styles=pd.read_csv('datasets/architect_styles.csv')
styles['Start_Year']=styles['Start_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
styles['End_Year']=styles['End_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
styles['Length']=styles['End_Year']-styles['Start_Year']
styles.head()

In [None]:
with open("datasets/architect_styles.json", 'tr') as fi:
    architects_by_style=json.load(fi)

In [None]:
pool = Pool()

In [None]:
architects_by_style = {i: architects_by_style[i] for i in sorted(list(architects_by_style.keys()))}

In [None]:
with open("datasets/architect_style_cnt.json", 'tr') as fi:
    architect_style_cnt=json.load(fi)

## Collect Style Data from ChatGPT

In [None]:
completed=False
while not completed:
    completed=True
    for continent in ["europe","america","asia","arabia","africa","australia","the world"]:
        if continent not in architect_style_cnt:
            architect_style_cnt[continent]=[]
            for i in range(10):
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f'List the name of the top 10 most important architecture styles in {continent} as JSON'}]
                    )
                    reply = completion.choices[0].message.content
                    try:
                        architect_style_cnt[continent].append(json.loads(reply))
                    except:
                        print(f"Error decoding {continent}-{i}")
                        print(reply)
                    completed=False
                except:
                    print(f"Error processing {continent}-{i}")
            with open("datasets/architect_style_cnt.json", 'tw') as fo:
                json.dump(architect_style_cnt, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

In [None]:
with open("datasets/architect_style_cnt.json", 'tw') as fo:
    json.dump(architect_style_cnt, fo, indent=4, ensure_ascii=False)

In [None]:
architect_style_cnt2={}
architect_style_cnt2["ALL"]=Counter()
for k,v in architect_style_cnt.items():
    architect_style_cnt2[k]=Counter()
    for vv in architect_style_cnt[k]:
        architect_style_cnt2[k].update(vv)
    architect_style_cnt2["ALL"].update(architect_style_cnt2[k])
    architect_style_cnt2[k]=dict(architect_style_cnt2[k])

architect_style_cnt2["ALL"]=dict(architect_style_cnt2["ALL"].most_common(42))

In [None]:
with open("datasets/architect_style_cnt2.json", 'tw') as fo:
    json.dump(architect_style_cnt2, fo, indent=4, ensure_ascii=False)

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'architects' not in architects_by_style[style]:
            try:
                #print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[ {"role": "user", "content": f'List 3 famous architects with name, birth date, death date, nationality, description, and notable buildings for "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    architects_by_style[style]={'architects':json.loads(reply)}['architects']
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]={'reply':reply}
                with open("datasets/architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        for arch in architects_by_style[style]['architects']:
            if 'quality' not in arch: arch['quality']=-1
            if 'valid' not in arch:
                #print(f"Query {arch['name']}/{style}")
                arch['valid']=[]
                for i in range(3):
                    try:
                        completion = openai.ChatCompletion.create(
                            model="gpt-3.5-turbo",
                            messages=[ {"role": "user", "content": f"Was '{arch['name']}' an architect of the '{style}' architecture style, answer with yes or no?"}]
                        )
                        arch['valid'].append(completion.choices[0].message.content.replace(".",""))
                        completed=False
                    except:
                        print(f"Error processing {style}")
            else:
                pass; # print(f"Skip {style}")
        with open("datasets/architect_styles.json", 'tw') as fo:
            json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
def validate_example(style, example):
    try:
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[ {"role": "user", "content": f"Does '{example}' represent the '{style}' architecture style, answer with yes or no?"}]
        )
        return {'style':style, 'example':example, 'vote':completion.choices[0].message.content.replace(".","")}
    except:
        return None

while True:
    jobs=[]
    for style in tqdm(styles['style']):
        if 'quality' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['quality']=-1
        if 'existence' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['existence']=-1
        if 'example_quality' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['example_quality']=-1
        if 'example_validity' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['example_validity']={}
            for example in architects_by_style[style]['style']['examples']:
                for i in range(3):
                    jobs.append((style, example))
    if jobs:
        print(f"Query {len(jobs)} jobs")
        for res in pool.starmap(validate_example, jobs, chunksize=1):
            if res:
                style=res['style']
                example=res['example']
                if example not in architects_by_style[style]['style']['example_validity']:
                    architects_by_style[style]['style']['example_validity'][example]=[]
                architects_by_style[style]['style']['example_validity'][example].append(res['vote'])
    else:
        break

In [None]:
with open("datasets/architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'style' not in architects_by_style[style] or 'period' not in architects_by_style[style]['style']:
            try:
                #print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'What is the time range, period, description, characteristics, examples, continent, and country of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['style']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['reply2']=reply
                with open("datasets/architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style in architects_by_style and 'valid' not in architects_by_style[style]:
            #print(f"Query {style}")
            architects_by_style[style]['valid']=[]
            for i in range(3):
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f"Was '{style}' an architecture style, answer with yes or no?"}]
                    )
                    architects_by_style[style]['valid'].append(completion.choices[0].message.content.replace(".",""))
                    completed=False
                except:
                    print(f"Error processing {style}")
            with open("datasets/architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style in architects_by_style and 'valid2' not in architects_by_style[style]:
            #print(f"Query {style}")
            architects_by_style[style]['valid2']=0
            for i in range(3):
                try:
                    text=architects_by_style[style]['style']['description'].replace(style," ")
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f"Does this '{text}'  the '{style}' architecture style, answer with yes or no?"}]
                    )
                    res=completion.choices[0].message.content.replace(".","")
                    architects_by_style[style]['valid2']+=1.0/3.0 if res.startswith("Yes") else 0.0
                    completed=False
                except Exception as e:
                    print(f"Error processing {style}", e)
            with open("datasets/architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

In [None]:
with open("datasets/architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'style' not in architects_by_style[style] or 'period' not in architects_by_style[style]['style']:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'What is the time range, period, description, characteristics, examples, continent, and country of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['style']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['reply2']=reply
                with open("datasets/architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'terms' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'summarize "{style}" architecture style with nouns and adjectives separated by ,'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['terms']=reply
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("datasets/architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
def query_character(style,i):
    try:
        if i % 2 ==0:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",#model="gpt-4",
                messages=[{"role": "user", "content": f'List the 10 nouns that characterize the "{style}" architecture style as JSON'}],
                request_timeout=20.0
            )
        else:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",#model="gpt-4",
                messages=[{"role": "user", "content": f'List the 10 adjectives that characterize the "{style}" architecture style as JSON'}],
                request_timeout=20.0
            )
        reply = completion.choices[0].message.content
        res=json.loads(reply)
        if isinstance(res, dict) and len(res)==1: res=list(res.values())[0]
        elif isinstance(res, dict) and len(res)>1: res=list(res.values())
        if isinstance(res, list):
            return res + ["c"]
        else:
            return ["err2"]
    except Exception as e:
        return ["err1"]

completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'termsV2' not in architects_by_style[style]:
            print(f"Query {style}")
            characterset=Counter()
            for res in pool.starmap(query_character, [(style,i) for i in range(20)], chunksize=1):
                try:
                    characterset.update(res)
                except Exception as e:
                    characterset.update(["err3"])
            architects_by_style[style]['termsV2']=dict(characterset)
            with open("datasets/architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
            completed=False

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'character' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'List as bullet points the top 3 characteristic elements of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['character']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("datasets/architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'short' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'summarize "{style}" architecture style in less than 150 characters'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['short']=reply
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("datasets/architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'significance' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'was the "{style}" architecture style significant on a scale 1 to 10 as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['significance']=json.loads(reply)
                    with open("datasets/architect_styles.json", 'tw') as fo:
                        json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                    completed=False
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['significance']=-1
            except:
                print(f"Error processing {style}")
                architects_by_style[style]['significance']=-1
        else:
            pass; # print(f"Skip {style}")

In [None]:
poemcnt=3
if generate_poems:
    completed=False
    while not completed:
        completed=True
        for style in tqdm(styles['style']):
            if "selected" not in architects_by_style[style] or not architects_by_style[style]["selected"]: continue
            if 'poems' not in architects_by_style[style]: architects_by_style[style]['poems']=[]
            while len(architects_by_style[style]['poems'])<poemcnt:
                try:
                    print(f"Query {style}")
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[{"role": "user", "content": f'write a short poem describing the "{style}" architecture style without using the words "'+'" or "'.join(style.split())+'"'}]
                    )
                    reply = completion.choices[0].message.content
                    if style not in reply:# and not any([(s in reply) for s in style.split()]):
                        try:
                            if 'poems' not in architects_by_style[style]: architects_by_style[style]['poems']=[]
                            architects_by_style[style]['poems'].append(reply)
                            completed=False
                        except Exception as e:
                            print(f"Error decoding {style}", e)
                            print(reply)
                except Exception as e:
                    print(f"Error processing {style}", e)
            with open("datasets/architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
pool.terminate()

## Validate Data

In [None]:
# load list of architects from wikipedia
# https://en.wikipedia.org/wiki/List_of_architects
architects=pd.read_csv('datasets/architects.csv', header=None)

In [None]:
for style in set(architects_by_style.keys())-set(styles['style']):
    print(f"Remove {style} styles")
    del architects_by_style[style]

In [None]:
for k, v in architects_by_style.items():
    if 'name' not in v: v['name']=k
    if 'architects' not in v: print("MISSING architects", k)
    for a in v['architects']:
        if 'name' not in a: print("MISSING architect name", k, a)
        if 'valid' not in a: print("MISSING architect validation", k, a['name'])
        elif isinstance(a['valid'], list) and any([c!="No" and c!="Yes" for c in a['valid']]):
            print(f"Error validation for {k}/{a['name']}")
        elif isinstance(a['valid'], list) and len(a['valid'])!=3:
            print(f"Incomplete validation for {k}/{a['name']}")
        elif isinstance(a['valid'], list):
            a['valid']=sum([1 if (c=="Yes") else 0 for c in a['valid']])/3
    if 'significance' not in v: print("MISSING significance", k)
    else:
        try:
            if isinstance(v['significance'],str):
                v['significance']=json.loads(v['significance'])
            elif isinstance(v['significance'],dict):
                if "score" in v['significance']: v['significance']=v['significance']['score']
                if "scale" in v['significance']: v['significance']=v['significance']['scale']
                if "rating" in v['significance']: v['significance']=v['significance']['rating']
        except:
            pass; #del v['significance']
    if 'terms' not in v: print("MISSING terms", k)
    else:
        if ':' in v['terms'] or '\n' in v['terms']: v['terms']=v['terms'].replace('Nouns:','').replace('Adjectives:','').replace('\n','')
    if 'style' not in v: print("MISSING style", k)
    else:
        s=v['style']
        if 'quality' not in s: print("MISSING style quality", k, s)
        if 'existence' not in s: print("MISSING existence", k, s)
        if 'example_quality' not in s: print("MISSING example_quality", k, s)
        if 'example_validity' not in s: print("MISSING example_validity", k, s)
        else: 
            for ek in s['example_validity'].keys():
                ev=s['example_validity'][ek]
                if isinstance(ev, list) and any([c!="No" and c!="Yes" for c in ev]):
                    print(f"Error example validation for {k}/{ek}")
                elif isinstance(ev, list) and len(ev)!=3:
                    print(f"Incomplete validation for {k}/{ek}")
                elif isinstance(ev, list):
                    s['example_validity'][ek]=sum([1 if (c=="Yes") else 0 for c in ev])/3
        if 'time_range' not in s: print("MISSING time_range", k)
        if 'period' not in s: print("MISSING period", k)
        if 'description' not in s: print("MISSING description", k)
        if 'characteristics' not in s: print("MISSING characteristics", k)
        else:
            if isinstance(s['characteristics'],str): 
                print("Characteristics should be a list", k)
                s['characteristics']=s['characteristics'].split(',')
        if 'examples' not in s: print("MISSING examples", k)
        else:
            if isinstance(s['examples'],str) and ',' in s['examples']: 
                print("Examples should be a list", k)
                s['examples']=s['examples'].split(',')
        if 'continent' not in s: print("MISSING continent", k)
        else:
            if isinstance(s['continent'],str) and ',' in s['continent']: 
                print("Continent should be a list", k)
                s['continent']=s['continent'].split(',')
        if 'country' not in s: print("MISSING country", k)
        else:
            if isinstance(s['country'],str) and ',' in s['country']: 
                print("Country should be a list", k)
                s['country']=s['country'].split(',')
        if 'country' not in s: print("MISSING country", k)
    if 'valid' not in v: print("MISSING valid", k)
    elif isinstance(v['valid'], list) and any([c!="No" and c!="Yes" for c in v['valid']]):
        print(f"Error validation for {k}")
    elif isinstance(v['valid'], list) and len(v['valid'])!=3:
        print(f"Incomplete validation for {k}")
    elif isinstance(v['valid'], list):
        v['valid']=sum([1 if (c=="Yes") else 0 for c in v['valid']])

In [None]:
for index, row in styles.iterrows():
    style=row['style']
    architects_by_style[style]['Start_Year']=row['Start_Year']
    architects_by_style[style]['End_Year']=row['End_Year']

In [None]:
for fn in os.listdir('assets/icons120'):
    if fn.endswith(".png"):
        name=fn.replace('.png', '').replace('_', ' ').title()
        if name in architects_by_style:
            architects_by_style[name]["icon"]=fn
        else:
            print(f"Missing style for icon {name}")

In [None]:
with open("datasets/architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

## Subselect

In [None]:
topstyles=styles.sort_values(by="google",ascending=False).groupby('continent').head(5).reset_index(drop=True).sort_values(by="google",ascending=False)
topstyles

In [None]:
fig = px.bar(topstyles, x='style', y='google', facet_col="continent")#, text=styles['style'], textangle=90, textposition='outside',textfont={'family':"Courier New, monospace","size":18, "color":"RebeccaPurple"})
#fig.update_layout( autosize=False, width=800, height=800)
fig.show()

In [None]:
selstyleslist=[vv for v in selstylesmap.values() for vv in v]
selstylesset=set(selstyleslist)
selstyles=styles[styles['style'].isin(selstylesset)]

In [None]:
print("Missing selected styles", selstylesset-set(styles['style']), " of ", len(selstylesset))

In [None]:
for area, stylesA in selstylesmap.items():
    for styleA in stylesA:
        architects_by_style[styleA]["selected"]=True
        architects_by_style[styleA]["style_area"]=area

In [None]:
architects_by_style_sub={k:architects_by_style[k] for v in selstylesmap.values() for k in v} #  and "icon" in architects_by_style[k]

In [None]:
with open("datasets/architect_styles_sub.json", 'tw') as fo:
    json.dump(architects_by_style_sub, fo, indent=4, ensure_ascii=False)

In [None]:
with open("datasets/architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
len(architects_by_style_sub)

In [None]:
styles_sub=styles[styles['style'].isin(architects_by_style_sub.keys())]

In [None]:
icons=[fn.replace('.png', '').replace('_', ' ').title() for fn in os.listdir('assets/icons120') if fn.endswith(".png")]
for style in styles_sub['style']:
    if style not in icons:
        print(f"Missing icon for style {style}")

In [None]:
def dots(v):
    if v==1.0: return '\\Dbbb'
    if v>=0.66: return '\\Dbb'
    if v>=0.33: return '\\Db'
    if v<0: return 'ERR'
    else: return '\\Dnb'

oarea=""
out=""
for style, sinfo in architects_by_style_sub.items():
    #if style == 'Mesoamerican': continue
    archC,archQ=0,0
    for arch in sinfo['architects']:
        archC+=arch['valid']
        archQ+=arch['quality']
    archC/=len(sinfo['architects'])
    archQ/=len(sinfo['architects'])
    archE=sum(sinfo['style']['example_validity'].values())/len(sinfo['style']['example_validity'])/3
    styleC=sinfo['valid']
    styleC2=sinfo['valid2']
    #print(f"{style} & {styleC: .0%} & {sinfo['style']['quality']: .0%} & {archC: .0%} & {sinfo['style']['quality']: .0%} & {archE: .0%} & {sinfo['style']['example_quality']: .0%} \\\\".replace("%","\,\%"))
    area=sinfo['style_area'].replace('&','\\&') if sinfo['style_area'].replace('&','\\&') != oarea else '         '
    oarea=sinfo['style_area'].replace('&','\\&')
    out+=f"{area} & {style} & {sinfo['Start_Year']} & {sinfo['End_Year']} & {dots(styleC)} & {dots(sinfo['style']['existence'])} & {dots(styleC2)} & {dots(sinfo['style']['quality'])} & {dots(archC)} & {dots(archQ)} & {dots(archE)} & {dots(sinfo['style']['example_quality'])} \\\\\n"
with open("datasets/style_quality.tex", 'tw') as fo:
    fo.write(out)

# Cache poems

In [None]:
# Create voice lines for each line in a poem with ElevenLabs.
# To run set:
# - generate_poems=True 
# - the `ELEVEN_API_KEY` environmental variable and 
# - `<YOUR_VOICE_PROFILE>` in the L16 below
if generate_poems:
  for style, sinfo in architects_by_style_sub.items():
    if "poems" in sinfo:
      os.makedirs(os.path.join("assets", "poems", style), exist_ok=True)
      for poem in sinfo['poems']:
        poemhash=str(uuid.uuid3(uuid.NAMESPACE_X500, poem)) # compute hash for poem
        for i, para in enumerate(poem.split("\n\n")):
          fn=os.path.join("assets", "poems", style, poemhash+"_"+str(i))
          if not os.path.exists(fn+".mp3"):
            print(f"Query {fn}")
            audio = generate( text=para, voice="<YOUR_VOICE_PROFILE>", model="eleven_multilingual_v2")
            if audio: save(audio, fn+".mp3")

## Midjourney query generator

In [None]:
#from PIL import Image
ignorestyle=set()
for style, sinfo in architects_by_style_sub.items():
    os.makedirs(f"examples/{style}", exist_ok=True)
    for fn in os.listdir(f"examples/{style}"):
        if fn.endswith(".png"):
#            im = Image.open(f"examples/{style}/"+fn)
#            im.load()
#            print(fn, im.info['meta_to_read'])
            ignorestyle.add(style)

In [None]:
# Based on this configuration different prompts are generated
addarchitects=True
addterms=False
addterms2=True
addartstyle=True
artstyle=", real photography, taken on a Canon EOS R5, shot on kodak portra 200, film grain"

In [None]:
queries=[]
for style, sinfo in architects_by_style_sub.items():
    if style in ignorestyle: continue
    os.makedirs(f"assets/style_generated/{style}", exist_ok=True)
    query=f"building in '{style}' architecture style"
    for arch in sinfo['architects']:
        queryA= query + ' by '+ arch['name']
        if addterms: queryA+='::2, '+sinfo['terms'].replace('  ',' ')
        if addterms2: queryA+='::2, '+sinfo['terms'].replace(',',' ').replace('  ',' ')
        if addartstyle: queryA+=artstyle
        queries.append(queryA)
    if addterms: query+=':2, '+sinfo['terms'].replace('  ',' ')
    if addterms2: query+=':2, '+sinfo['terms'].replace(',',' ').replace('  ',' ')
    if addartstyle: query+=artstyle
    queries.append(query)
    query=f"simple white icon of a building in '{style}' architecture style with black background::2"
    queries.append(query)
    if addterms: query+=', '+sinfo['terms'].replace('  ',' ')
    if addterms2: query+=', '+sinfo['terms'].replace(',',' ').replace('  ',' ')
    queries.append(query)

In [None]:
pd.DataFrame(queries).to_csv("datasets/midjourney_queries.csv", index=False, quoting=3, header=False, sep=";", escapechar="\\")

## Compute Semantic Similarity

In [None]:
figfmt='svg' # None
figw = 650
figh = 600
figm = dict(l=10, r=10, b=10, t=10, pad=4)
figm=dict(l=10, r=10, b=10, t=10, pad=4)
figmT=dict(l=10, r=10, b=10, t=30, pad=4)
figlBC=dict(orientation="h")
figlBR=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99)
figlTL=dict(orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.01)
figlTC=dict(orientation="h", yanchor="top", y=0.99)
figfnt=dict(size=12)
ccscale = ["#E5ECF5", "Purple"] ##512A8C
ccscale = ["#E5ECF5", plotly.colors.carto.Bold[0]]

In [None]:
# cleanup terms
for styleA, istyleA in architects_by_style_sub.items():
    lc=Counter()
    for c,v in istyleA["termsV2"].items():
        c=c.capitalize()
        if c not in {"C","Err0","Err1","Err2","True"}:
            c=c.replace("Temples","Temple")
            c=c.replace("Palaces","Palace")
            c=c.replace("Columns","Column")
            c=c.replace("Courtyards","Courtyard")
            c=c.replace("Pyramids","Pyramid")
            c=c.replace("Domes","Dome")
            c=c.replace("Gardens","Garden")
            c=c.replace("Pilasters","Pilaster")
            c=c.replace("Curves","Curve")
            c=c.replace("Arches","Arche")
            c=c.replace("Minarets","Minaret")
            c=c.replace("Pillars","Pillar")
            lc.update({c:v})
    istyleA["termsV2"]=lc

In [None]:
similarity=[]
similarityM=[]
termssim=[]
similarityL=[styleA for styleA, istyleA in architects_by_style_sub.items()]
termsAll=Counter()
for styleA, istyleA in architects_by_style_sub.items():
    termsAll.update(istyleA["termsV2"])


In [None]:
for k,v in dict(termsAll.most_common(100)).items():
    if k.endswith("s"): print(k,v)

In [None]:

termsAll=dict(termsAll.most_common(len(similarityL)))
termsAllS=set(termsAll.keys())
for styleA, istyleA in architects_by_style_sub.items():
    row=[]
    similarityM.append(row)
    termA=set(istyleA["termsV2"].keys())
    architects_by_style[styleA]['style_similarity']={}
    architects_by_style[styleA]['style_similarity'][styleA]={"unweighted": 1.0, "weighted": 1.0}
    architects_by_style_sub[styleA]['style_similarity']={}
    architects_by_style_sub[styleA]['style_similarity'][styleA]={"unweighted": 1.0, "weighted": 1.0}
    for styleB, istyleB in architects_by_style_sub.items():
        if styleA!=styleB:
            termB=set(istyleB["termsV2"].keys())
            cut=termA.intersection(termB)
            cup=termA.union(termB)
            # weight
            cutw=sum([istyleA["termsV2"][c] for c in cut])+sum([istyleB["termsV2"][c] for c in cut])
            cupw=sum(istyleA["termsV2"].values())+sum(istyleB["termsV2"].values())
            #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarity.append({'styleA':styleA, 'styleB':styleB, "unweighted": len(cut)/len(cup), "weighted": cutw/cupw})
            row.append(cutw/cupw)
            architects_by_style[styleA]['style_similarity'][styleB]={"unweighted": len(cut)/len(cup), "weighted": cutw/cupw}
            architects_by_style_sub[styleA]['style_similarity'][styleB]={"unweighted": len(cut)/len(cup), "weighted": cutw/cupw}
        else: row.append(0)
    cut=termA.intersection(termsAllS)
    for c in cut:
        cutw=istyleA["termsV2"][c]
        termssim.append({'style':styleA, 'term':c, "weighted":cutw/2})
similarity=pd.DataFrame(similarity)
termssim=pd.DataFrame(termssim)

In [None]:
with open("datasets/architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
with open("datasets/architect_styles_sub.json", 'tw') as fo:
    json.dump(architects_by_style_sub, fo, indent=4, ensure_ascii=False)