In [2]:
import os
import openai
import pandas as pd
import json
from collections import Counter
import plotly.express as px
import plotly.graph_objs as go
import plotly
import math
from tqdm import tqdm
import ray
from ray.util.multiprocessing import Pool
from elevenlabs import generate, save
import uuid
from pydub import AudioSegment

In [3]:
if "OPENAI_API_KEY" not in os.environ:
    raise Exception("Missing OPENAI_API_KEY")
if "ELEVEN_API_KEY" not in os.environ:
    raise Exception("Missing ELEVEN_API_KEY")

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.request_timeout=30.0

In [5]:
styles=pd.read_csv('architect_styles.csv')
styles['Start_Year']=styles['Start_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
styles['End_Year']=styles['End_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
styles['Length']=styles['End_Year']-styles['Start_Year']
styles.head()

Unnamed: 0,style,Start_Year,End_Year,google,scholar,continent,parent,Length
0,Ancient Egyptian,-3100,-30,221000.0,469000.0,Africa,,3070
1,Ancient Greek,-900,-146,540000.0,985000.0,Europe,,754
2,Ancient Roman,-300,476,382000.0,1410000.0,Europe,,776
3,Byzantine,330,1453,878000.0,320000.0,Europe,,1123
4,Islamic,622,1500,4760000.0,1480000.0,Arabia,,878


In [6]:
with open("architect_styles.json", 'tr') as fi:
    architects_by_style=json.load(fi)

In [7]:
pool = Pool()

2023-12-12 18:21:36,255	INFO worker.py:1633 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


In [8]:
architects_by_style = {i: architects_by_style[i] for i in sorted(list(architects_by_style.keys()))}

In [9]:
with open("architect_style_cnt.json", 'tr') as fi:
    architect_style_cnt=json.load(fi)

In [10]:
completed=False
while not completed:
    completed=True
    for continent in ["europe","america","asia","arabia","africa","australia","the world"]:
        if continent not in architect_style_cnt:
            architect_style_cnt[continent]=[]
            for i in range(10):
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f'List the name of the top 10 most important architecture styles in {continent} as JSON'}]
                    )
                    reply = completion.choices[0].message.content
                    try:
                        architect_style_cnt[continent].append(json.loads(reply))
                    except:
                        print(f"Error decoding {continent}-{i}")
                        print(reply)
                    completed=False
                except:
                    print(f"Error processing {continent}-{i}")
            with open("architect_style_cnt.json", 'tw') as fo:
                json.dump(architect_style_cnt, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

In [11]:
with open("architect_style_cnt.json", 'tw') as fo:
    json.dump(architect_style_cnt, fo, indent=4, ensure_ascii=False)

In [12]:
architect_style_cnt2={}
architect_style_cnt2["ALL"]=Counter()
for k,v in architect_style_cnt.items():
    architect_style_cnt2[k]=Counter()
    for vv in architect_style_cnt[k]:
        architect_style_cnt2[k].update(vv)
    architect_style_cnt2["ALL"].update(architect_style_cnt2[k])
    architect_style_cnt2[k]=dict(architect_style_cnt2[k])

architect_style_cnt2["ALL"]=dict(architect_style_cnt2["ALL"].most_common(42))

In [13]:
with open("architect_style_cnt2.json", 'tw') as fo:
    json.dump(architect_style_cnt2, fo, indent=4, ensure_ascii=False)

In [14]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'architects' not in architects_by_style[style]:
            try:
                #print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[ {"role": "user", "content": f'List 3 famous architects with name, birth date, death date, nationality, description, and notable buildings for "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    architects_by_style[style]={'architects':json.loads(reply)}['architects']
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]={'reply':reply}
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 285343.98it/s]


In [15]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        for arch in architects_by_style[style]['architects']:
            if 'quality' not in arch: arch['quality']=-1
            if 'valid' not in arch:
                #print(f"Query {arch['name']}/{style}")
                arch['valid']=[]
                for i in range(3):
                    try:
                        completion = openai.ChatCompletion.create(
                            model="gpt-3.5-turbo",
                            messages=[ {"role": "user", "content": f"Was '{arch['name']}' an architect of the '{style}' architecture style, answer with yes or no?"}]
                        )
                        arch['valid'].append(completion.choices[0].message.content.replace(".",""))
                        completed=False
                    except:
                        print(f"Error processing {style}")
            else:
                pass; # print(f"Skip {style}")
        with open("architect_styles.json", 'tw') as fo:
            json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

100%|██████████| 113/113 [00:02<00:00, 53.45it/s]


In [16]:
def validate_example(style, example):
    try:
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[ {"role": "user", "content": f"Does '{example}' represent the '{style}' architecture style, answer with yes or no?"}]
        )
        return {'style':style, 'example':example, 'vote':completion.choices[0].message.content.replace(".","")}
    except:
        return None

while True:
    jobs=[]
    for style in tqdm(styles['style']):
        if 'quality' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['quality']=-1
        if 'existence' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['existence']=-1
        if 'example_quality' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['example_quality']=-1
        if 'example_validity' not in architects_by_style[style]['style']:
            architects_by_style[style]['style']['example_validity']={}
            for example in architects_by_style[style]['style']['examples']:
                for i in range(3):
                    jobs.append((style, example))
    if jobs:
        print(f"Query {len(jobs)} jobs")
        for res in pool.starmap(validate_example, jobs, chunksize=1):
            if res:
                style=res['style']
                example=res['example']
                if example not in architects_by_style[style]['style']['example_validity']:
                    architects_by_style[style]['style']['example_validity'][example]=[]
                architects_by_style[style]['style']['example_validity'][example].append(res['vote'])
    else:
        break

100%|██████████| 113/113 [00:00<00:00, 445029.44it/s]


In [17]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [18]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'style' not in architects_by_style[style] or 'period' not in architects_by_style[style]['style']:
            try:
                #print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'What is the time range, period, description, characteristics, examples, continent, and country of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['style']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['reply2']=reply
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 692918.64it/s]


In [19]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style in architects_by_style and 'valid' not in architects_by_style[style]:
            #print(f"Query {style}")
            architects_by_style[style]['valid']=[]
            for i in range(3):
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f"Was '{style}' an architecture style, answer with yes or no?"}]
                    )
                    architects_by_style[style]['valid'].append(completion.choices[0].message.content.replace(".",""))
                    completed=False
                except:
                    print(f"Error processing {style}")
            with open("architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 1128467.50it/s]


In [20]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style in architects_by_style and 'valid2' not in architects_by_style[style]:
            #print(f"Query {style}")
            architects_by_style[style]['valid2']=0
            for i in range(3):
                try:
                    text=architects_by_style[style]['style']['description'].replace(style," ")
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f"Does this '{text}'  the '{style}' architecture style, answer with yes or no?"}]
                    )
                    res=completion.choices[0].message.content.replace(".","")
                    architects_by_style[style]['valid2']+=1.0/3.0 if res.startswith("Yes") else 0.0
                    completed=False
                except Exception as e:
                    print(f"Error processing {style}", e)
            with open("architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 579408.74it/s]


In [21]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [22]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'style' not in architects_by_style[style] or 'period' not in architects_by_style[style]['style']:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'What is the time range, period, description, characteristics, examples, continent, and country of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['style']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['reply2']=reply
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 715946.15it/s]


In [23]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'terms' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'summarize "{style}" architecture style with nouns and adjectives separated by ,'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['terms']=reply
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 1028104.89it/s]


In [24]:
def query_character(style,i):
    try:
        if i % 2 ==0:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",#model="gpt-4",
                messages=[{"role": "user", "content": f'List the 10 nouns that characterize the "{style}" architecture style as JSON'}],
                request_timeout=20.0
            )
        else:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",#model="gpt-4",
                messages=[{"role": "user", "content": f'List the 10 adjectives that characterize the "{style}" architecture style as JSON'}],
                request_timeout=20.0
            )
        reply = completion.choices[0].message.content
        res=json.loads(reply)
        if isinstance(res, dict) and len(res)==1: res=list(res.values())[0]
        elif isinstance(res, dict) and len(res)>1: res=list(res.values())
        if isinstance(res, list):
            return res + ["c"]
        else:
            return ["err2"]
    except Exception as e:
        return ["err1"]

completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'termsV2' not in architects_by_style[style]:
            print(f"Query {style}")
            characterset=Counter()
            for res in pool.starmap(query_character, [(style,i) for i in range(20)], chunksize=1):
                try:
                    characterset.update(res)
                except Exception as e:
                    characterset.update(["err3"])
            architects_by_style[style]['termsV2']=dict(characterset)
            with open("architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
            completed=False

100%|██████████| 113/113 [00:00<00:00, 991540.49it/s]


In [25]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'character' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'List as bullet points the top 3 characteristic elements of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['character']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 1190845.11it/s]


In [26]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'short' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'summarize "{style}" architecture style in less than 150 characters'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['short']=reply
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 882600.28it/s]


In [27]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'significance' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'was the "{style}" architecture style significant on a scale 1 to 10 as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['significance']=json.loads(reply)
                    with open("architect_styles.json", 'tw') as fo:
                        json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                    completed=False
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['significance']=-1
            except:
                print(f"Error processing {style}")
                architects_by_style[style]['significance']=-1
        else:
            pass; # print(f"Skip {style}")

100%|██████████| 113/113 [00:00<00:00, 947912.70it/s]


In [28]:
poemcnt=3
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if "selected" not in architects_by_style[style] or not architects_by_style[style]["selected"]: continue
        if 'poems' not in architects_by_style[style]: architects_by_style[style]['poems']=[]
        while len(architects_by_style[style]['poems'])<poemcnt:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'write a short poem describing the "{style}" architecture style without using the words "'+'" or "'.join(style.split())+'"'}]
                )
                reply = completion.choices[0].message.content
                if style not in reply:# and not any([(s in reply) for s in style.split()]):
                    try:
                        if 'poems' not in architects_by_style[style]: architects_by_style[style]['poems']=[]
                        architects_by_style[style]['poems'].append(reply)
                        completed=False
                    except Exception as e:
                        print(f"Error decoding {style}", e)
                        print(reply)
            except Exception as e:
                print(f"Error processing {style}", e)
        with open("architect_styles.json", 'tw') as fo:
            json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

100%|██████████| 113/113 [00:00<00:00, 175.48it/s]


In [29]:
pool.terminate()

In [30]:
architects=pd.read_csv('../architects.csv', header=None)
#for architect in architects:

In [31]:
for style in set(architects_by_style.keys())-set(styles['style']):
    print(f"Remove {style} styles")
    del architects_by_style[style]

In [32]:
for k, v in architects_by_style.items():
    if 'name' not in v: v['name']=k
    if 'architects' not in v: print("MISSING architects", k)
    for a in v['architects']:
        if 'name' not in a: print("MISSING architect name", k, a)
        if 'valid' not in a: print("MISSING architect validation", k, a['name'])
        elif isinstance(a['valid'], list) and any([c!="No" and c!="Yes" for c in a['valid']]):
            print(f"Error validation for {k}/{a['name']}")
        elif isinstance(a['valid'], list) and len(a['valid'])!=3:
            print(f"Incomplete validation for {k}/{a['name']}")
        elif isinstance(a['valid'], list):
            a['valid']=sum([1 if (c=="Yes") else 0 for c in a['valid']])/3
    if 'significance' not in v: print("MISSING significance", k)
    else:
        try:
            if isinstance(v['significance'],str):
                v['significance']=json.loads(v['significance'])
            elif isinstance(v['significance'],dict):
                if "score" in v['significance']: v['significance']=v['significance']['score']
                if "scale" in v['significance']: v['significance']=v['significance']['scale']
                if "rating" in v['significance']: v['significance']=v['significance']['rating']
        except:
            pass; #del v['significance']
    if 'terms' not in v: print("MISSING terms", k)
    else:
        if ':' in v['terms'] or '\n' in v['terms']: v['terms']=v['terms'].replace('Nouns:','').replace('Adjectives:','').replace('\n','')
    if 'style' not in v: print("MISSING style", k)
    else:
        s=v['style']
        if 'quality' not in s: print("MISSING style quality", k, s)
        if 'existence' not in s: print("MISSING existence", k, s)
        if 'example_quality' not in s: print("MISSING example_quality", k, s)
        if 'example_validity' not in s: print("MISSING example_validity", k, s)
        else: 
            for ek in s['example_validity'].keys():
                ev=s['example_validity'][ek]
                if isinstance(ev, list) and any([c!="No" and c!="Yes" for c in ev]):
                    print(f"Error example validation for {k}/{ek}")
                elif isinstance(ev, list) and len(ev)!=3:
                    print(f"Incomplete validation for {k}/{ek}")
                elif isinstance(ev, list):
                    s['example_validity'][ek]=sum([1 if (c=="Yes") else 0 for c in ev])/3
        if 'time_range' not in s: print("MISSING time_range", k)
        if 'period' not in s: print("MISSING period", k)
        if 'description' not in s: print("MISSING description", k)
        if 'characteristics' not in s: print("MISSING characteristics", k)
        else:
            if isinstance(s['characteristics'],str): 
                print("Characteristics should be a list", k)
                s['characteristics']=s['characteristics'].split(',')
        if 'examples' not in s: print("MISSING examples", k)
        else:
            if isinstance(s['examples'],str) and ',' in s['examples']: 
                print("Examples should be a list", k)
                s['examples']=s['examples'].split(',')
        if 'continent' not in s: print("MISSING continent", k)
        else:
            if isinstance(s['continent'],str) and ',' in s['continent']: 
                print("Continent should be a list", k)
                s['continent']=s['continent'].split(',')
        if 'country' not in s: print("MISSING country", k)
        else:
            if isinstance(s['country'],str) and ',' in s['country']: 
                print("Country should be a list", k)
                s['country']=s['country'].split(',')
        if 'country' not in s: print("MISSING country", k)
    if 'valid' not in v: print("MISSING valid", k)
    elif isinstance(v['valid'], list) and any([c!="No" and c!="Yes" for c in v['valid']]):
        print(f"Error validation for {k}")
    elif isinstance(v['valid'], list) and len(v['valid'])!=3:
        print(f"Incomplete validation for {k}")
    elif isinstance(v['valid'], list):
        v['valid']=sum([1 if (c=="Yes") else 0 for c in v['valid']])

MISSING time_range Indonesian
MISSING time_range Palladian
MISSING time_range Regency
MISSING time_range Sumerian
MISSING time_range Swahili
MISSING time_range Thai


In [33]:
for index, row in styles.iterrows():
    style=row['style']
    architects_by_style[style]['Start_Year']=row['Start_Year']
    architects_by_style[style]['End_Year']=row['End_Year']

In [34]:
for fn in os.listdir('styles120'):
    if fn.endswith(".png"):
        name=fn.replace('.png', '').replace('_', ' ').title()
        if name in architects_by_style:
            architects_by_style[name]["icon"]=fn
        else:
            print(f"Missing style for icon {name}")

Missing style for icon Ranch-Style House


In [35]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

## Subselect

In [36]:
topstyles=styles.sort_values(by="google",ascending=False).groupby('continent').head(5).reset_index(drop=True).sort_values(by="google",ascending=False)
topstyles

Unnamed: 0,style,Start_Year,End_Year,google,scholar,continent,parent,Length
0,Contemporary,1950,2023,8450000.0,,International,,73
1,Colonial,1600,1947,4950000.0,17600.0,International,,347
2,Gothic,1100,1500,4950000.0,398000.0,Europe,,400
3,Islamic,622,1500,4760000.0,1480000.0,Arabia,,878
4,Chinese Imperial,618,1912,2990000.0,13900.0,Asia,,1294
5,Baroque,1584,1750,2310000.0,182000.0,Europe,,166
6,Art Deco,1920,1939,2150000.0,103000.0,America,,19
7,Indian,-300,2023,2000000.0,,Asia,,2323
8,Victorian,1837,1901,1860000.0,478000.0,Europe,,64
9,Renaissance,1400,1600,1210000.0,1460000.0,Europe,,200


In [37]:
fig = px.bar(topstyles, x='style', y='google', facet_col="continent")#, text=styles['style'], textangle=90, textposition='outside',textfont={'family':"Courier New, monospace","size":18, "color":"RebeccaPurple"})
#fig.update_layout( autosize=False, width=800, height=800)
fig.show()

In [38]:
selstylesmap={
    "Central & South America": [
      "Mesoamerican",
      #"Stilt House"
    ],
    "Anglo World": [
      "Ranch-style house",
      "Victorian",
      "Art Deco"
    ],
    "Intercultural": [
      "Colonial",
      "Brutalism",
      "Contemporary"
    ],
    "Europe": [
      "Ancient Greek",
      "Ancient Roman",
      "Romanesque",
      "Gothic",
      "Baroque",
      "Renaissance",
      "Neoclassical",
      "Art Nouveau",
      "Bauhaus"
    ],
    "North Eurasia": [
      "Muscovite",
      "Constructivism"
    ],
    "Sahel & Sub-Saharan Africa": [
      "Mud Brick",
      "Swahili"
    ],
    "Middle East & North Africa": [
      "Ancient Egyptian",
      "Modern Islamic"
    ],
    "South Asia": [
      "Gupta",
      "Indo-Saracenic"
    ],
    "East Asia": [
      "Chinese Imperial",
      "Edo Period"
    ],
    "Southeast Asia & South Pacific": [
      "Khmer Empire",
      "Thai"
    ],
    "Central Asia": [
      "Timurid Period",
      "Persian"
    ]
  }

In [39]:
selstyleslist=[vv for v in selstylesmap.values() for vv in v]
selstylesset=set(selstyleslist)
selstyles=styles[styles['style'].isin(selstylesset)]

In [40]:
print("Missing selected styles", selstylesset-set(styles['style']), " of ", len(selstylesset))

Missing selected styles set()  of  30


In [41]:
for area, stylesA in selstylesmap.items():
    for styleA in stylesA:
        architects_by_style[styleA]["selected"]=True
        architects_by_style[styleA]["style_area"]=area

In [42]:
architects_by_style_sub={k:architects_by_style[k] for v in selstylesmap.values() for k in v} #  and "icon" in architects_by_style[k]

In [43]:
with open("architect_styles_sub.json", 'tw') as fo:
    json.dump(architects_by_style_sub, fo, indent=4, ensure_ascii=False)

In [44]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [45]:
len(architects_by_style_sub)

30

In [46]:
styles_sub=styles[styles['style'].isin(architects_by_style_sub.keys())]

In [47]:
icons=[fn.replace('.png', '').replace('_', ' ').title() for fn in os.listdir('styles120') if fn.endswith(".png")]
for style in styles_sub['style']:
    if style not in icons:
        print(f"Missing icon for style {style}")

Missing icon for style Ranch-style house


In [48]:
def dots(v):
    if v==1.0: return '\\Dbbb'
    if v>=0.66: return '\\Dbb'
    if v>=0.33: return '\\Db'
    if v<0: return 'ERR'
    else: return '\\Dnb'

oarea=""
out=""
for style, sinfo in architects_by_style_sub.items():
    #if style == 'Mesoamerican': continue
    archC,archQ=0,0
    for arch in sinfo['architects']:
        archC+=arch['valid']
        archQ+=arch['quality']
    archC/=len(sinfo['architects'])
    archQ/=len(sinfo['architects'])
    archE=sum(sinfo['style']['example_validity'].values())/len(sinfo['style']['example_validity'])/3
    styleC=sinfo['valid']
    styleC2=sinfo['valid2']
    #print(f"{style} & {styleC: .0%} & {sinfo['style']['quality']: .0%} & {archC: .0%} & {sinfo['style']['quality']: .0%} & {archE: .0%} & {sinfo['style']['example_quality']: .0%} \\\\".replace("%","\,\%"))
    area=sinfo['style_area'].replace('&','\\&') if sinfo['style_area'].replace('&','\\&') != oarea else '         '
    oarea=sinfo['style_area'].replace('&','\\&')
    out+=f"{area} & {style} & {sinfo['Start_Year']} & {sinfo['End_Year']} & {dots(styleC)} & {dots(sinfo['style']['existence'])} & {dots(styleC2)} & {dots(sinfo['style']['quality'])} & {dots(archC)} & {dots(archQ)} & {dots(archE)} & {dots(sinfo['style']['example_quality'])} \\\\\n"
with open("style_quality.tex", 'tw') as fo:
    fo.write(out)

# Cache poems

In [49]:
for style, sinfo in architects_by_style_sub.items():
  if "poems" in sinfo:
    os.makedirs(os.path.join("poems", style), exist_ok=True)
    for poem in sinfo['poems']:
      poemhash=str(uuid.uuid3(uuid.NAMESPACE_X500, poem)) # compute hash for poem
      for i, para in enumerate(poem.split("\n\n")):
        fn=os.path.join("assets", "poems", style, poemhash+"_"+str(i))
        if not os.path.exists(fn+".mp3"):
          print(f"Query {fn}")
          audio = generate( text=para, voice="MeAndMyself", model="eleven_multilingual_v2")
          if audio: save(audio, fn+".mp3")

## Calendar plot

In [None]:
#styles['Start_Year']=styles['Start_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
#styles['End_Year']=styles['End_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
#styles['Length']=styles['End_Year']-styles['Start_Year']
styles['Past']=2025-styles['End_Year']
styles['startY']=styles['Start_Year']
styles['styleL']=styles['style']+" "
styles

In [None]:
styles=styles.sort_values(by=['Start_Year','End_Year'])

In [None]:
styles_sub=styles[styles['style'].isin(architects_by_style_sub.keys())]
#styles_sub=styles

In [None]:
fig = go.FigureWidget()
fig.add_bar(x=styles_sub['style'], y=styles_sub['Length'], base=styles_sub['Past'], text=styles_sub["styleL"])#, color=styles_sub["continent"]
fig.update_layout(
    yaxis = dict(
        tickmode = 'array',
        tickvals = [    25,    125,    225,    325,    425,   525,   1025, 2025,      3025,      4025,     10025],
        ticktext = ['2000', '1900', '1800', '1700', '1600', '1500', '1000',  '0', '1000 BC', '2000 BC', '8000 BC'],
        range=[1,3025]
    )
)
fig.update_traces(textangle=90, textposition="outside", cliponaxis=False,width=.2)
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='show')
fig.update_layout(autosize=False, width=800, height=1200, template="none")
fig.update_xaxes(visible=False, showticklabels=False)
fig.show()

In [None]:
#fig.write_image("timeline_lin.png")
#fig.write_image("timeline_lin.svg")

In [None]:
#tickvals2 = [     1,      5,     15,     25,    125,    225,    525,   1025, 2025,      4025,     12025]
#ticktext2 = ['2024', '2020', '2010', '2000', '1900', '1800', '1500', '1000',  '0', '2000 BC', '10000 BC']
tickvals2 = [      5,     15,     25,    125,    225,    525,   1025, 2025,      4025]
ticktext2 = [ '2020', '2010', '2000', '1900', '1800', '1500', '1000',  '0', '2000 BC']
mticks=sorted(set([(2025-r) for rr in [
    range(2000,2025,1),
    range(1800,2000,10),
    range(1000,1800,100),
    range(   0,1000,100),
    range(-10000,1000,1000),
    #range(-2000,1000,1000)
] for r in rr if r not in tickvals2]))

fig = go.FigureWidget()
fig.add_bar(x=styles_sub['style'], y=styles_sub['Length'], base=styles_sub['Past'], text=styles_sub["styleL"])#, color=styles_sub["continent"]
fig.update_layout(
    yaxis = dict(
        type = 'log',
        tickangle=90,
        tickmode = 'array',
        tickvals = tickvals2,
        ticktext = ticktext2,
        minor=dict(ticks="inside", ticklen=0, showgrid=True, 
            tickmode = 'array',
            tickvals = mticks
        ),
        #range =[math.log(r) for r in [2,4025]]
    )
)
fig.update_traces(textangle=90, textposition="outside", cliponaxis=False,width=.1)
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='show')
fig.update_layout(autosize=False, width=800, height=1600, template="none")
fig.update_xaxes(visible=False, showticklabels=False)
fig.show()

In [None]:
fig.write_image("timeline_log.png")
fig.write_image("timeline_log.svg")

## Midjourney query generator

In [None]:
#from PIL import Image
ignorestyle=set()
for style, sinfo in architects_by_style_sub.items():
    os.makedirs(f"examples/{style}", exist_ok=True)
    for fn in os.listdir(f"examples/{style}"):
        if fn.endswith(".png"):
#            im = Image.open(f"examples/{style}/"+fn)
#            im.load()
#            print(fn, im.info['meta_to_read'])
            ignorestyle.add(style)

In [None]:
addarchitects=True
addterms=False
addterms2=True
addartstyle=True
artstyle=", real photography, taken on a Canon EOS R5, shot on kodak portra 200, film grain"

In [None]:
queries=[]
for style, sinfo in architects_by_style_sub.items():
    if style in ignorestyle: continue
    os.makedirs(f"style_generated/{style}", exist_ok=True)
    query=f"building in '{style}' architecture style"
    for arch in sinfo['architects']:
        queryA= query + ' by '+ arch['name']
        if addterms: queryA+='::2, '+sinfo['terms'].replace('  ',' ')
        if addterms2: queryA+='::2, '+sinfo['terms'].replace(',',' ').replace('  ',' ')
        if addartstyle: queryA+=artstyle
        queries.append(queryA)
    if addterms: query+=':2, '+sinfo['terms'].replace('  ',' ')
    if addterms2: query+=':2, '+sinfo['terms'].replace(',',' ').replace('  ',' ')
    if addartstyle: query+=artstyle
    queries.append(query)
    query=f"simple white icon of a building in '{style}' architecture style with black background::2"
    queries.append(query)
    if addterms: query+=', '+sinfo['terms'].replace('  ',' ')
    if addterms2: query+=', '+sinfo['terms'].replace(',',' ').replace('  ',' ')
    queries.append(query)

In [None]:
pd.DataFrame(queries).to_csv("style_generated/queries.csv", index=False, quoting=3, header=False, sep=";", escapechar="\\")

## Similarity

In [None]:
figfmt='svg' # None
figw = 650
figh = 600
figm = dict(l=10, r=10, b=10, t=10, pad=4)
figm=dict(l=10, r=10, b=10, t=10, pad=4)
figmT=dict(l=10, r=10, b=10, t=30, pad=4)
figlBC=dict(orientation="h")
figlBR=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99)
figlTL=dict(orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.01)
figlTC=dict(orientation="h", yanchor="top", y=0.99)
figfnt=dict(size=12)
ccscale = ["#E5ECF5", "Purple"] ##512A8C
ccscale = ["#E5ECF5", plotly.colors.carto.Bold[0]]

In [None]:
# cleanup terms
for styleA, istyleA in architects_by_style_sub.items():
    lc=Counter()
    for c,v in istyleA["termsV2"].items():
        c=c.capitalize()
        if c not in {"C","Err0","Err1","Err2","True"}:
            c=c.replace("Temples","Temple")
            c=c.replace("Palaces","Palace")
            c=c.replace("Columns","Column")
            c=c.replace("Courtyards","Courtyard")
            c=c.replace("Pyramids","Pyramid")
            c=c.replace("Domes","Dome")
            c=c.replace("Gardens","Garden")
            c=c.replace("Pilasters","Pilaster")
            c=c.replace("Curves","Curve")
            c=c.replace("Arches","Arche")
            c=c.replace("Minarets","Minaret")
            c=c.replace("Pillars","Pillar")
            lc.update({c:v})
    istyleA["termsV2"]=lc

In [None]:
similarity=[]
similarityM=[]
termssim=[]
similarityL=[styleA for styleA, istyleA in architects_by_style_sub.items()]
termsAll=Counter()
for styleA, istyleA in architects_by_style_sub.items():
    termsAll.update(istyleA["termsV2"])


In [None]:
for k,v in dict(termsAll.most_common(100)).items():
    if k.endswith("s"): print(k,v)

In [None]:

termsAll=dict(termsAll.most_common(len(similarityL)))
termsAllS=set(termsAll.keys())
for styleA, istyleA in architects_by_style_sub.items():
    row=[]
    similarityM.append(row)
    termA=set(istyleA["termsV2"].keys())
    architects_by_style[styleA]['style_similarity']={}
    architects_by_style[styleA]['style_similarity'][styleA]={"unweighted": 1.0, "weighted": 1.0}
    architects_by_style_sub[styleA]['style_similarity']={}
    architects_by_style_sub[styleA]['style_similarity'][styleA]={"unweighted": 1.0, "weighted": 1.0}
    for styleB, istyleB in architects_by_style_sub.items():
        if styleA!=styleB:
            termB=set(istyleB["termsV2"].keys())
            cut=termA.intersection(termB)
            cup=termA.union(termB)
            # weight
            cutw=sum([istyleA["termsV2"][c] for c in cut])+sum([istyleB["termsV2"][c] for c in cut])
            cupw=sum(istyleA["termsV2"].values())+sum(istyleB["termsV2"].values())
            #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarity.append({'styleA':styleA, 'styleB':styleB, "unweighted": len(cut)/len(cup), "weighted": cutw/cupw})
            row.append(cutw/cupw)
            architects_by_style[styleA]['style_similarity'][styleB]={"unweighted": len(cut)/len(cup), "weighted": cutw/cupw}
            architects_by_style_sub[styleA]['style_similarity'][styleB]={"unweighted": len(cut)/len(cup), "weighted": cutw/cupw}
        else: row.append(0)
    cut=termA.intersection(termsAllS)
    for c in cut:
        cutw=istyleA["termsV2"][c]
        termssim.append({'style':styleA, 'term':c, "weighted":cutw/2})
similarity=pd.DataFrame(similarity)
termssim=pd.DataFrame(termssim)

In [None]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
with open("architect_styles_sub.json", 'tw') as fo:
    json.dump(architects_by_style_sub, fo, indent=4, ensure_ascii=False)

In [None]:
termssim.sort_values(by="weighted", ascending=False).head()

In [None]:
similarity.groupby("styleA").sum().reset_index().sort_values(by="weighted",ascending=False)

In [None]:
fig=px.density_heatmap(similarity, x='styleA', y='styleB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
#fig.write_image("../images/style_similarity.png", width=figw, height=figh)
#fig.write_image("../images/style_similarity.pdf", width=figw, height=figh)

In [None]:
fig=px.density_heatmap(similarity, x='styleA', y='styleB', z='weighted', labels={"styleA":"Style A","styleB":"Style B"}, color_continuous_scale=ccscale) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")

# Vertical gridlines
ls=len(similarity.styleA.unique())
for i in range(ls+1):
    fig.add_shape(go.layout.Shape(type="line", x0=i-0.5, x1=i-0.5, y0=-0.5, y1=ls-0.5, line=dict(color="lightgray")))
    fig.add_shape(go.layout.Shape(type="line", x0=-0.5, x1=ls-0.5, y0=i-0.5, y1=i-0.5, line=dict(color="lightgray")))
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99),coloraxis_colorbar_title_text="Sim.", width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt,coloraxis_colorbar_thickness=12,xaxis_showgrid=True, yaxis_showgrid=True,xaxis_side="top")
fig.show(figfmt)
#fig.write_image("../images/style_similarity1.png", width=figw, height=figh)
#fig.write_image("../images/style_similarity1.pdf", width=figw, height=figh)

In [None]:
selstyleslist=[
 'Stilt House',
 'Mud Brick',
 'Swahili',

 'Muscovite',
 'Renaissance',
 'Colonial Revival',
 'Colonial',
 'Neoclassical',
 'Ancient Greek',
 'Ancient Roman',
 'Ancient Egyptian',
 'Mesoamerican',

 'Victorian',
 'Baroque',
 'Gothic',
 'Romanesque',
 
 'Art Nouveau',
 'Art Deco',
 'Brutalism',
 'Contemporary',
 'Bauhaus',
 'Constructivism',
 'Modern Islamic',
 
 'Persian',
 'Timurid Period',
 'Gupta',
 'Indo-Saracenic',
 'Chinese Imperial',
 'Khmer Empire',
 'Thai',
 'Edo Period']

In [None]:
fig=px.density_heatmap(similarity, x='styleA', y='styleB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = "array", yaxis_categoryarray= selstyleslist, yaxis_tickmode='linear', xaxis_categoryorder = 'array', xaxis_categoryarray= selstyleslist, xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
#fig.write_image("../images/style_similarity.png", width=figw, height=figh)
#fig.write_image("../images/style_similarity.pdf", width=figw, height=figh)

In [None]:
fig=px.density_heatmap(similarity, x='styleA', y='styleB', z='weighted', labels={"styleA":"Style A","styleB":"Style B"}, color_continuous_scale=ccscale) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")

# Vertical gridlines
ls=len(similarity.styleA.unique())
for i in range(ls+1):
    fig.add_shape(go.layout.Shape(type="line", x0=i-0.5, x1=i-0.5, y0=-0.5, y1=ls-0.5, line=dict(color="lightgray")))
    fig.add_shape(go.layout.Shape(type="line", x0=-0.5, x1=ls-0.5, y0=i-0.5, y1=i-0.5, line=dict(color="lightgray")))
#fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99),coloraxis_colorbar_title_text="Sim.", width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt,coloraxis_colorbar_thickness=12,xaxis_showgrid=True, yaxis_showgrid=True,xaxis_side="top")
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99),coloraxis_colorbar_title_text="Sim.", width=figw, height=figh, margin=figm, yaxis_categoryorder = "array", yaxis_categoryarray= selstyleslist, yaxis_tickmode='linear', xaxis_categoryorder = 'array', xaxis_categoryarray= selstyleslist, xaxis_tickmode='linear', font=figfnt,coloraxis_colorbar_thickness=12,xaxis_showgrid=True, yaxis_showgrid=True,xaxis_side="top")
fig.show(figfmt)
fig.write_image("../images/style_similarity.png", width=figw, height=figh)
fig.write_image("../images/style_similarity.pdf", width=figw, height=figh)

In [None]:
fig=px.density_heatmap(termssim, y='style', x='term', z='weighted', labels={"style":"Style","term":"Characteristic Words"}, color_continuous_scale=ccscale) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")

# Vertical gridlines
ls=len(similarity.styleA.unique())
for i in range(ls+1):
    fig.add_shape(go.layout.Shape(type="line", x0=i-0.5, x1=i-0.5, y0=-0.5, y1=ls-0.5, line=dict(color="lightgray")))
    fig.add_shape(go.layout.Shape(type="line", x0=-0.5, x1=ls-0.5, y0=i-0.5, y1=i-0.5, line=dict(color="lightgray")))
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99),coloraxis_colorbar_title_text="Count", width=figw, height=figh, margin=figm, xaxis_categoryorder = 'max descending', xaxis_tickmode='linear', yaxis_categoryorder = 'array', yaxis_categoryarray= selstyleslist, yaxis_tickmode='linear', font=figfnt,coloraxis_colorbar_thickness=12,xaxis_showgrid=True, yaxis_showgrid=True,xaxis_side="top")
fig.show(figfmt)
fig.write_image("../images/style_term_similarity.png", width=figw, height=figh)
fig.write_image("../images/style_term_similarity.pdf", width=figw, height=figh)

In [None]:
similarityA=[]
for areaA, stylesA in selstylesmap.items():
    for areaB, stylesB in selstylesmap.items():
        if areaA!=areaB:
            termsA=Counter()
            termsB=Counter()
            for styleA in stylesA:
                termsA.update(architects_by_style_sub[styleA]["termsV2"])
            for styleB in stylesB:
                termsB.update(architects_by_style_sub[styleB]["termsV2"])
            termA=set(termsA.keys())-{"c","err0","err1","err2"}
            termB=set(termsB.keys())-{"c","err0","err1","err2"}
            cut=termA.intersection(termB)
            cup=termA.union(termB)
            # weight
            cutw=sum([termsA[c] for c in cut if c in termA])+sum([termsB[c] for c in cut if c in termB])
            cupw=sum(termsA.values())+sum(termsB.values())
            #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarityA.append({'areaA':areaA, 'areaB':areaB, "unweighted":len(cut)/len(cup),"weighted":cutw/cupw})
        else:
            cutU,cupU,cutW,cupW=0,0,0,0
            for styleA in stylesA:
                istyleA=architects_by_style_sub[styleA]["termsV2"]
                for styleB in stylesB:
                    istyleB=architects_by_style_sub[styleB]["termsV2"]
                    if styleA!=styleB:
                        termA=set(istyleA.keys())-{"c","err0","err1","err2"}
                        termB=set(istyleB.keys())-{"c","err0","err1","err2"}
                        cut=termA.intersection(termB)
                        cup=termA.union(termB)
                        # weight
                        cutU+=len(cut)
                        cupU+=len(cup)
                        cutW+=sum([istyleA[c] for c in cut if c in termA])+sum([istyleB[c] for c in cut if c in termB])
                        cupW+=sum(istyleA.values())+sum(istyleB.values())
                        #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarityA.append({'areaA':areaA, 'areaB':areaB, "unweighted":cutU/cupU,"weighted":cutW/cupW})
similarityA=pd.DataFrame(similarityA)

In [None]:
fig=px.density_heatmap(similarityA, x='areaA', y='areaB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")
#fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt)
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = 'array', yaxis_tickmode='linear', xaxis_categoryorder = 'array', xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
fig.write_image("../images/style_area_similarity.png", width=figw, height=figh)
fig.write_image("../images/style_area_similarity.pdf", width=figw, height=figh)

In [None]:
similarityA2=[]
for areaA, stylesA in selstylesmap.items():
    for areaB, stylesB in selstylesmap.items():
            cutU,cupU,cutW,cupW=0,0,0,0
            for styleA in stylesA:
                istyleA=architects_by_style_sub[styleA]["termsV2"]
                for styleB in stylesB:
                    istyleB=architects_by_style_sub[styleB]["termsV2"]
                    if styleA!=styleB:
                        termA=set(istyleA.keys())
                        termB=set(istyleB.keys())
                        cut=termA.intersection(termB)
                        cup=termA.union(termB)
                        # weight
                        cutU+=len(cut)
                        cupU+=len(cup)
                        cutW+=sum([istyleA[c] for c in cut if c in termA])+sum([istyleB[c] for c in cut if c in termB])
                        cupW+=sum(istyleA.values())+sum(istyleB.values())
                        #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarityA2.append({'areaA':areaA, 'areaB':areaB, "unweighted":cutU/cupU,"weighted":cutW/cupW})
similarityA2=pd.DataFrame(similarityA2)

In [None]:
fig=px.density_heatmap(similarityA, x='areaA', y='areaB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = "array", yaxis_tickmode='linear', xaxis_categoryorder = 'array', xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
fig.write_image("../images/style_area_similarity2.png", width=figw, height=figh)
fig.write_image("../images/style_area_similarity2.pdf", width=figw, height=figh)

In [None]:
n_c=7

import numpy as np
from sklearn.cluster import SpectralCoclustering

data=np.asarray(np.matrix(similarityM))

model = SpectralCoclustering(n_clusters=n_c, random_state=0)
model.fit(data)

rowsort=np.argsort(model.row_labels_)
colsort=np.argsort(model.column_labels_)

fit_data = data[rowsort]
fit_data = fit_data[:,colsort]

#plt.matshow(data, cmap=plt.cm.Blues)
#plt.title("Original dataset")
#plt.matshow(fit_data, cmap=plt.cm.Blues)
#plt.title("After biclustering; rearranged to show biclusters")
#plt.show()

fig=px.density_heatmap(similarity, x='styleA', y='styleB', z='weighted', labels={"weighted":""}, color_continuous_scale=ccscale) # YlGnBu
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = "array", yaxis_categoryarray= [similarityL[i] for i in colsort], yaxis_tickmode='linear', xaxis_categoryorder = 'array', xaxis_categoryarray= [similarityL[i] for i in rowsort], xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
#fig.write_image("../images/style_similarity3.png", width=figw, height=figh)
#fig.write_image("../images/style_similarity3.pdf", width=figw, height=figh)