In [None]:
import os
import openai
import pandas as pd
import json
from collections import Counter
import plotly.express as px
import plotly.graph_objs as go
import math
from tqdm import tqdm
import ray
from ray.util.multiprocessing import Pool

In [None]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.request_timeout=30.0

In [None]:
styles=pd.read_csv('architect_styles.csv')
styles.head()

In [None]:
pool = Pool()

In [None]:
with open("architect_styles.json", 'tr') as fi:
    architects_by_style=json.load(fi)

In [None]:
architects_by_style.keys()

In [None]:
with open("architect_style_cnt.json", 'tr') as fi:
    architect_style_cnt=json.load(fi)

In [None]:
completed=False
while not completed:
    completed=True
    for continent in ["europe","america","asia","arabia","africa","australia","the world"]:
        if continent not in architect_style_cnt:
            architect_style_cnt[continent]=[]
            for i in range(10):
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f'List the name of the top 10 most important architecture styles in {continent} as JSON'}]
                    )
                    reply = completion.choices[0].message.content
                    try:
                        architect_style_cnt[continent].append(json.loads(reply))
                    except:
                        print(f"Error decoding {continent}-{i}")
                        print(reply)
                    completed=False
                except:
                    print(f"Error processing {continent}-{i}")
            with open("architect_style_cnt.json", 'tw') as fo:
                json.dump(architect_style_cnt, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

In [None]:
with open("architect_style_cnt.json", 'tw') as fo:
    json.dump(architect_style_cnt, fo, indent=4, ensure_ascii=False)

In [None]:
architect_style_cnt2={}
architect_style_cnt2["ALL"]=Counter()
for k,v in architect_style_cnt.items():
    architect_style_cnt2[k]=Counter()
    for vv in architect_style_cnt[k]:
        architect_style_cnt2[k].update(vv)
    architect_style_cnt2["ALL"].update(architect_style_cnt2[k])
    architect_style_cnt2[k]=dict(architect_style_cnt2[k])

architect_style_cnt2["ALL"]=dict(architect_style_cnt2["ALL"].most_common(42))

In [None]:
with open("architect_style_cnt2.json", 'tw') as fo:
    json.dump(architect_style_cnt2, fo, indent=4, ensure_ascii=False)

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'architects' not in architects_by_style[style]:
            try:
                #print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[ {"role": "user", "content": f'List 3 famous architects with name, birth date, death date, nationality, description, and notable buildings for "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    architects_by_style[style]={'architects':json.loads(reply)}['architects']
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]={'reply':reply}
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        for arch in architects_by_style[style]['architects']:
            if style in architects_by_style and 'valid' not in arch:
                #print(f"Query {arch['name']}/{style}")
                arch['valid']=[]
                for i in range(3):
                    try:
                        completion = openai.ChatCompletion.create(
                            model="gpt-3.5-turbo",
                            messages=[ {"role": "user", "content": f"Was '{arch['name']}' an architect of the '{style}' architecture style, answer with yes or no?"}]
                        )
                        arch['valid'].append(completion.choices[0].message.content.replace(".",""))
                        completed=False
                    except:
                        print(f"Error processing {style}")
            else:
                pass; # print(f"Skip {style}")
        with open("architect_styles.json", 'tw') as fo:
            json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'style' not in architects_by_style[style] or 'period' not in architects_by_style[style]['style']:
            try:
                #print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'What is the time range, period, description, characteristics, examples, continent, and country of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['style']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['reply2']=reply
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style in architects_by_style and 'valid' not in architects_by_style[style]:
            #print(f"Query {style}")
            architects_by_style[style]['valid']=[]
            for i in range(3):
                try:
                    completion = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=[ {"role": "user", "content": f"Was '{style}' an architecture style, answer with yes or no?"}]
                    )
                    architects_by_style[style]['valid'].append(completion.choices[0].message.content.replace(".",""))
                    completed=False
                except:
                    print(f"Error processing {style}")
            with open("architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
        else:
            pass; # print(f"Skip {style}")

In [None]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'style' not in architects_by_style[style] or 'period' not in architects_by_style[style]['style']:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'What is the time range, period, description, characteristics, examples, continent, and country of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['style']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['reply2']=reply
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'terms' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'summarize "{style}" architecture style with nouns and adjectives separated by ,'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['terms']=reply
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
def query_character(style,i):
    try:
        if i % 2 ==0:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",#model="gpt-4",
                messages=[{"role": "user", "content": f'List the 10 nouns that characterize the "{style}" architecture style as JSON'}],
                request_timeout=20.0
            )
        else:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",#model="gpt-4",
                messages=[{"role": "user", "content": f'List the 10 adjectives that characterize the "{style}" architecture style as JSON'}],
                request_timeout=20.0
            )
        reply = completion.choices[0].message.content
        res=json.loads(reply)
        if isinstance(res, dict) and len(res)==1: res=list(res.values())[0]
        elif isinstance(res, dict) and len(res)>1: res=list(res.values())
        if isinstance(res, list):
            return res + ["c"]
        else:
            return ["err2"]
    except Exception as e:
        return ["err1"]

completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'termsV2' not in architects_by_style[style]:
            print(f"Query {style}")
            characterset=Counter()
            for res in pool.starmap(query_character, [(style,i) for i in range(20)], chunksize=1):
                try:
                    characterset.update(res)
                except Exception as e:
                    characterset.update(["err3"])
            architects_by_style[style]['termsV2']=dict(characterset)
            with open("architect_styles.json", 'tw') as fo:
                json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
            completed=False

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'character' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'List as bullet points the top 3 characteristic elements of the "{style}" architecture style as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['character']=json.loads(reply)
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'short' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'summarize "{style}" architecture style in less than 150 characters'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['short']=reply
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                with open("architect_styles.json", 'tw') as fo:
                    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                completed=False
            except:
                print(f"Error processing {style}")
        else:
            pass; # print(f"Skip {style}")

In [None]:
completed=False
while not completed:
    completed=True
    for style in tqdm(styles['style']):
        if style not in architects_by_style or 'significance' not in architects_by_style[style]:
            try:
                print(f"Query {style}")
                completion = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": f'was the "{style}" architecture style significant on a scale 1 to 10 as JSON'}]
                )
                reply = completion.choices[0].message.content
                try:
                    if style not in architects_by_style: architects_by_style[style]={}
                    architects_by_style[style]['significance']=json.loads(reply)
                    with open("architect_styles.json", 'tw') as fo:
                        json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)
                    completed=False
                except:
                    print(f"Error decoding {style}")
                    print(reply)
                    architects_by_style[style]['significance']=-1
            except:
                print(f"Error processing {style}")
                architects_by_style[style]['significance']=-1
        else:
            pass; # print(f"Skip {style}")

In [None]:
pool.terminate()

In [None]:
architects=pd.read_csv('../architects.csv', header=None)
#for architect in architects:

In [None]:
for k, v in architects_by_style.items():
    if 'name' not in v: v['name']=k
    if 'architects' not in v: print("MISSING architects", k)
    for a in v['architects']:
        if 'name' not in a: print("MISSING architect name", k, a)
    if 'significance' not in v: print("MISSING significance", k)
    else:
        try:
            if isinstance(v['significance'],str):
                v['significance']=json.loads(v['significance'])
            elif isinstance(v['significance'],dict):
                if "score" in v['significance']: v['significance']=v['significance']['score']
                if "scale" in v['significance']: v['significance']=v['significance']['scale']
                if "rating" in v['significance']: v['significance']=v['significance']['rating']
        except:
            pass; #del v['significance']
    if 'terms' not in v: print("MISSING terms", k)
    else:
        if ':' in v['terms'] or '\n' in v['terms']: v['terms']=v['terms'].replace('Nouns:','').replace('Adjectives:','').replace('\n','')
    if 'style' not in v: print("MISSING style", k)
    if 'time_range' not in v['style']: print("MISSING time_range", k)
    if 'period' not in v['style']: print("MISSING period", k)
    if 'description' not in v['style']: print("MISSING description", k)
    if 'characteristics' not in v['style']: print("MISSING characteristics", k)
    else:
        if isinstance(v['style']['characteristics'],str): 
            print("Characteristics should be a list", k)
            v['style']['characteristics']=v['style']['characteristics'].split(',')
    if 'examples' not in v['style']: print("MISSING examples", k)
    else:
        if isinstance(v['style']['examples'],str) and ',' in v['style']['examples']: 
            print("Examples should be a list", k)
            v['style']['examples']=v['style']['examples'].split(',')
    if 'continent' not in v['style']: print("MISSING continent", k)
    else:
        if isinstance(v['style']['continent'],str) and ',' in v['style']['continent']: 
            print("Continent should be a list", k)
            v['style']['continent']=v['style']['continent'].split(',')
    if 'country' not in v['style']: print("MISSING country", k)
    else:
        if isinstance(v['style']['country'],str) and ',' in v['style']['country']: 
            print("Country should be a list", k)
            v['style']['country']=v['style']['country'].split(',')
    if 'country' not in v['style']: print("MISSING country", k)

In [None]:
for index, row in styles.iterrows():
    style=row['style']
    architects_by_style[style]['Start_Year']=row['Start_Year']
    architects_by_style[style]['End_Year']=row['End_Year']

In [None]:
for fn in os.listdir('styles120'):
    if fn.endswith(".png"):
        name=fn.replace('.png', '').replace('_', ' ').title()
        if name in architects_by_style:
            architects_by_style[name]["icon"]=fn
        else:
            print(f"Missing style for icon {name}")

In [None]:
with open("architect_styles.json", 'tw') as fo:
    json.dump(architects_by_style, fo, indent=4, ensure_ascii=False)

## Subselect

In [None]:
topstyles=styles.sort_values(by="google",ascending=False).groupby('continent').head(5).reset_index(drop=True).sort_values(by="google",ascending=False)
topstyles

In [None]:
fig = px.bar(topstyles, x='style', y='google', facet_col="continent")#, text=styles['style'], textangle=90, textposition='outside',textfont={'family':"Courier New, monospace","size":18, "color":"RebeccaPurple"})
#fig.update_layout( autosize=False, width=800, height=800)
fig.show()

In [None]:
selstylesmap={
    "Anglo World": [
      "Colonial Revival",
      "Victorian",
      "Art Deco"
    ],
    "Europe": [
      "Ancient Roman",
      "Romanesque",
      "Gothic",
      "Baroque",
      "Renaissance",
      "Neoclassical",
      "Art Nouveau",
      "Bauhaus"
    ],
    "Eurasia": [
      "Muscovite",
      "Constructivism"
    ],
    "C. & S. America": [
      "Mesoamerican",
      "Stilt House"
    ],
    "M. East & N. Africa": [
      "Ancient Egyptian",
      "Modern Islamic"
    ],
    "C. & S. Africa": [
      "Mud Brick",
      "Swahili"
    ],
    "S. Asia": [
      "Gupta",
      "Indo-Saracenic"
    ],
    "C. Asia": [
      "Timurid Period",
      "Persian"
    ],
    "E. Asia": [
      "Chinese Imperial",
      "Edo Period"
    ],
    "SE. Asia": [
      "Khmer Empire",
      "Thai"
    ],
    "Intercultural": [
      "Colonial",
      "Brutalism",
      "Contemporary"
    ]
  }

In [None]:
selstylesset=set([vv for v in selstylesmap.values() for vv in v])
selstyles=styles[styles['style'].isin(selstylesset)]

In [None]:
print("Missing selected styles", selstylesset-set(styles['style']), " of ", len(selstylesset))

In [None]:
architects_by_style_sub={k:architects_by_style[k] for k in styles['style'] if k in selstylesset} #  and "icon" in architects_by_style[k]

In [None]:
for area, stylesA in selstylesmap.items():
    for styleA in stylesA:
        architects_by_style_sub[styleA]["style_area"]=area

In [None]:
with open("architect_styles_sub.json", 'tw') as fo:
    json.dump(architects_by_style_sub, fo, indent=4, ensure_ascii=False)

In [None]:
len(architects_by_style_sub)

In [None]:
styles_sub=styles[styles['style'].isin(architects_by_style_sub.keys())]

In [None]:
icons=[fn.replace('.png', '').replace('_', ' ').title() for fn in os.listdir('styles120') if fn.endswith(".png")]
for styleA in styles_sub['style']:
    if style not in icons:
        print(f"Missing icon for style {style}")

In [None]:
for style, sinfo in architects_by_style_sub.items():
    for arch in sinfo['architects']:
        if "cnt" not in arch:
            print(f"Missing validation for {style}/{arch['name']}")
        else:
            if all([c=="No" for c in arch['cnt']]):
                print(f"Failed validation for {style}/{arch['name']}")

## Calendar plot

In [None]:
styles['Start_Year']=styles['Start_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
styles['End_Year']=styles['End_Year'].apply(lambda y: int(y.replace(' CE','').replace('present','2023')) if 'BCE' not in y else -int(y.replace(' BCE','')))
styles['Length']=styles['End_Year']-styles['Start_Year']
styles['Past']=2025-styles['End_Year']
styles['startY']=styles['Start_Year']
styles['styleL']=styles['style']+" "
styles

In [None]:
styles=styles.sort_values(by=['Start_Year','End_Year'])

In [None]:
styles_sub=styles[styles['style'].isin(architects_by_style_sub.keys())]
#styles_sub=styles

In [None]:
fig = go.FigureWidget()
fig.add_bar(x=styles_sub['style'], y=styles_sub['Length'], base=styles_sub['Past'], text=styles_sub["styleL"])#, color=styles_sub["continent"]
fig.update_layout(
    yaxis = dict(
        tickmode = 'array',
        tickvals = [    25,    125,    225,    325,    425,   525,   1025, 2025,      3025,      4025,     10025],
        ticktext = ['2000', '1900', '1800', '1700', '1600', '1500', '1000',  '0', '1000 BC', '2000 BC', '8000 BC'],
        range=[1,3025]
    )
)
fig.update_traces(textangle=90, textposition="outside", cliponaxis=False,width=.2)
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='show')
fig.update_layout(autosize=False, width=800, height=1200, template="none")
fig.update_xaxes(visible=False, showticklabels=False)
fig.show()

In [None]:
#fig.write_image("timeline_lin.png")
#fig.write_image("timeline_lin.svg")

In [None]:
#tickvals2 = [     1,      5,     15,     25,    125,    225,    525,   1025, 2025,      4025,     12025]
#ticktext2 = ['2024', '2020', '2010', '2000', '1900', '1800', '1500', '1000',  '0', '2000 BC', '10000 BC']
tickvals2 = [      5,     15,     25,    125,    225,    525,   1025, 2025,      4025]
ticktext2 = [ '2020', '2010', '2000', '1900', '1800', '1500', '1000',  '0', '2000 BC']
mticks=sorted(set([(2025-r) for rr in [
    range(2000,2025,1),
    range(1800,2000,10),
    range(1000,1800,100),
    range(   0,1000,100),
    range(-10000,1000,1000),
    #range(-2000,1000,1000)
] for r in rr if r not in tickvals2]))

fig = go.FigureWidget()
fig.add_bar(x=styles_sub['style'], y=styles_sub['Length'], base=styles_sub['Past'], text=styles_sub["styleL"])#, color=styles_sub["continent"]
fig.update_layout(
    yaxis = dict(
        type = 'log',
        tickangle=90,
        tickmode = 'array',
        tickvals = tickvals2,
        ticktext = ticktext2,
        minor=dict(ticks="inside", ticklen=0, showgrid=True, 
            tickmode = 'array',
            tickvals = mticks
        ),
        #range =[math.log(r) for r in [2,4025]]
    )
)
fig.update_traces(textangle=90, textposition="outside", cliponaxis=False,width=.1)
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='show')
fig.update_layout(autosize=False, width=800, height=1600, template="none")
fig.update_xaxes(visible=False, showticklabels=False)
fig.show()

In [None]:
fig.write_image("timeline_log.png")
fig.write_image("timeline_log.svg")

## Midjourney query generator

#from PIL import Image
ignorestyle=set()
for style, sinfo in architects_by_style_sub.items():
    os.makedirs(f"examples/{style}", exist_ok=True)
    for fn in os.listdir(f"examples/{style}"):
        if fn.endswith(".png"):
#            im = Image.open(f"examples/{style}/"+fn)
#            im.load()
#            print(fn, im.info['meta_to_read'])
            ignorestyle.add(style)

In [None]:
addarchitects=True
addterms=False
addterms2=True
addartstyle=True
artstyle=", real photography, taken on a Canon EOS R5, shot on kodak portra 200, film grain"

In [None]:
queries=[]
for style, sinfo in architects_by_style_sub.items():
    if style in ignorestyle: continue
    os.makedirs(f"style_generated/{style}", exist_ok=True)
    query=f"building in '{style}' architecture style"
    for arch in sinfo['architects']:
        queryA= query + ' by '+ arch['name']
        if addterms: queryA+=':2, '+sinfo['terms'].replace('  ',' ')
        if addterms2: queryA+=':2, '+sinfo['terms'].replace(',',' ').replace('  ',' ')
        if addartstyle: queryA+=artstyle
        queries.append(queryA)
    if addterms: query+=':2, '+sinfo['terms'].replace('  ',' ')
    if addterms2: query+=':2, '+sinfo['terms'].replace(',',' ').replace('  ',' ')
    if addartstyle: query+=artstyle
    queries.append(query)

In [None]:
pd.DataFrame(queries).to_csv("style_generated/queries.csv", index=False, quoting=3, header=False, sep=";", escapechar="\\")

## Similarity

In [None]:
similarity=[]
for styleA, istyleA in architects_by_style_sub.items():
    for styleB, istyleB in architects_by_style_sub.items():
        if styleA!=styleB:
            termA=set(istyleA["termsV2"].keys())
            termB=set(istyleB["termsV2"].keys())
            cut=termA.intersection(termB)
            cup=termA.union(termB)
            # weight
            cutw=sum([istyleA["termsV2"][c] for c in cut if c in termA])+sum([istyleB["termsV2"][c] for c in cut if c in termB])
            cupw=sum(istyleA["termsV2"].values())+sum(istyleB["termsV2"].values())
            #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarity.append({'styleA':styleA, 'styleB':styleB, "unweighted":len(cut)/len(cup),"weighted":cutw/cupw})
similarity=pd.DataFrame(similarity)
similarity

In [None]:
figfmt='svg' # None
figw=600
figh=600
figm=dict(l=10, r=10, b=10, t=10, pad=4)
figmT=dict(l=10, r=10, b=10, t=30, pad=4)
figlBC=dict(orientation="h")
figlBR=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99)
figlTL=dict(orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.01)
figlTC=dict(orientation="h", yanchor="top", y=0.99)
figfnt=dict(size=16)

In [None]:
fig=px.density_heatmap(similarity, x='styleA', y='styleB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
fig.write_image("../images/style_similarity.png", width=figw, height=figh)
fig.write_image("../images/style_similarity.pdf", width=figw, height=figh)

In [None]:
similarityA=[]
for areaA, stylesA in selstylesmap.items():
    for areaB, stylesB in selstylesmap.items():
        if areaA!=areaB:
            termsA=Counter()
            termsB=Counter()
            for styleA in stylesA:
                termsA.update(architects_by_style_sub[styleA]["termsV2"])
            for styleB in stylesB:
                termsB.update(architects_by_style_sub[styleB]["termsV2"])
            termA=set(termsA.keys())
            termB=set(termsB.keys())
            cut=termA.intersection(termB)
            cup=termA.union(termB)
            # weight
            cutw=sum([termsA[c] for c in cut if c in termA])+sum([termsB[c] for c in cut if c in termB])
            cupw=sum(termsA.values())+sum(termsB.values())
            #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarityA.append({'areaA':areaA, 'areaB':areaB, "unweighted":len(cut)/len(cup),"weighted":cutw/cupw})
        else:
            cutU,cupU,cutW,cupW=0,0,0,0
            for styleA in stylesA:
                istyleA=architects_by_style_sub[styleA]["termsV2"]
                for styleB in stylesB:
                    istyleB=architects_by_style_sub[styleB]["termsV2"]
                    if styleA!=styleB:
                        termA=set(istyleA.keys())
                        termB=set(istyleB.keys())
                        cut=termA.intersection(termB)
                        cup=termA.union(termB)
                        # weight
                        cutU+=len(cut)
                        cupU+=len(cup)
                        cutW+=sum([istyleA[c] for c in cut if c in termA])+sum([istyleB[c] for c in cut if c in termB])
                        cupW+=sum(istyleA.values())+sum(istyleB.values())
                        #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarityA.append({'areaA':areaA, 'areaB':areaB, "unweighted":cutU/cupU,"weighted":cutW/cupW})
similarityA=pd.DataFrame(similarityA)
similarityA

In [None]:
fig=px.density_heatmap(similarityA, x='areaA', y='areaB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
fig.write_image("../images/style_area_similarity.png", width=figw, height=figh)
fig.write_image("../images/style_area_similarity.pdf", width=figw, height=figh)

In [None]:
similarityA=[]
for areaA, stylesA in selstylesmap.items():
    for areaB, stylesB in selstylesmap.items():
            cutU,cupU,cutW,cupW=0,0,0,0
            for styleA in stylesA:
                istyleA=architects_by_style_sub[styleA]["termsV2"]
                for styleB in stylesB:
                    istyleB=architects_by_style_sub[styleB]["termsV2"]
                    if styleA!=styleB:
                        termA=set(istyleA.keys())
                        termB=set(istyleB.keys())
                        cut=termA.intersection(termB)
                        cup=termA.union(termB)
                        # weight
                        cutU+=len(cut)
                        cupU+=len(cup)
                        cutW+=sum([istyleA[c] for c in cut if c in termA])+sum([istyleB[c] for c in cut if c in termB])
                        cupW+=sum(istyleA.values())+sum(istyleB.values())
                        #print(f"Similarity '{styleA}':'{styleB}' is unweighted {len(cut)/len(cup): .0%} weighted {cutw/cupw: .0%}")
            similarityA.append({'areaA':areaA, 'areaB':areaB, "unweighted":cutU/cupU,"weighted":cutW/cupW})
similarityA=pd.DataFrame(similarityA)
similarityA

In [None]:
fig=px.density_heatmap(similarityA, x='areaA', y='areaB', z='weighted', labels={"weighted":""}, color_continuous_scale=px.colors.sequential.YlGnBu) # YlGnBu
#fig=px.bar(archstylestats4, x='count', y='architect', color='type', barmode="overlay", orientation='h', facet_col="style")
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=0.01, xanchor="right", x=0.99,title="Count"), width=figw, height=figh, margin=figm, yaxis_categoryorder = 'max ascending', yaxis_tickmode='linear', xaxis_categoryorder = 'max ascending', xaxis_tickmode='linear', font=figfnt)
fig.show(figfmt)
fig.write_image("../images/style_area_similarity2.png", width=figw, height=figh)
fig.write_image("../images/style_area_similarity2.pdf", width=figw, height=figh)