In [1]:
import holoviews as hv
from holoviews import opts, dim
from bokeh.sampledata.airport_routes import routes, airports
import numpy as np
import pandas as pd
import re
from itertools import combinations
hv.extension('bokeh')

In [46]:
# Count the routes between Airports
route_counts = routes.groupby(['SourceID', 'DestinationID']).Stops.count().reset_index()
nodes = hv.Dataset(airports, 'AirportID', 'City')
chord = hv.Chord((route_counts, nodes), ['SourceID', 'DestinationID'], ['Stops'])

# Select the 20 busiest airports
busiest = list(routes.groupby('SourceID').count().sort_values('Stops').iloc[-20:].index.values)
busiest_airports = chord.select(AirportID=busiest, selection_mode='nodes')

In [48]:
airports

Unnamed: 0,AirportID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,TZ,Type,source
0,3411,Barter Island LRRS Airport,Barter Island,United States,BTI,PABA,70.134003,-143.582001,2,-9,A,America/Anchorage,airport,OurAirports
1,3413,Cape Lisburne LRRS Airport,Cape Lisburne,United States,LUR,PALU,68.875099,-166.110001,16,-9,A,America/Anchorage,airport,OurAirports
2,3414,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.732903,-163.005005,22,-9,A,America/Anchorage,airport,OurAirports
3,3415,Hilo International Airport,Hilo,United States,ITO,PHTO,19.721399,-155.048004,38,-10,N,Pacific/Honolulu,airport,OurAirports
4,3416,Orlando Executive Airport,Orlando,United States,ORL,KORL,28.545500,-81.332901,113,-5,A,America/New_York,airport,OurAirports
5,3417,Bettles Airport,Bettles,United States,BTT,PABT,66.913902,-151.529007,647,-9,A,America/Anchorage,airport,OurAirports
6,3418,Clear Airport,Clear Mews,United States,Z84,PACL,64.301201,-149.119995,552,-9,A,America/Anchorage,airport,OurAirports
7,3419,Indian Mountain LRRS Airport,Indian Mountains,United States,UTO,PAIM,65.992798,-153.703995,1273,-9,A,America/Anchorage,airport,OurAirports
8,3420,Fort Yukon Airport,Fort Yukon,United States,FYU,PFYU,66.571503,-145.250000,433,-9,A,America/Anchorage,airport,OurAirports
9,3421,Sparrevohn LRRS Airport,Sparrevohn,United States,SVW,PASV,61.097401,-155.574005,1585,-9,A,America/Anchorage,airport,OurAirports


In [6]:
busiest_airports.opts(
    opts.Chord(cmap='Category20', edge_color=dim('SourceID').str(), 
               height=800, labels='City', node_color=dim('AirportID').str(), width=800))

In [74]:
airports = pd.DataFrame()
airports["genre"] = ["A","B","C"]
airports["genre2"] = ["A","B","C"]

routes = pd.DataFrame()
routes["gerne_source"] =      ["A","A","A","B","B","B","C","C","C"] * 20
routes["gerne_destination"] = ["B","B","B","A","A","A","A","B","B"] * 20
routes["Count"] = [1,1,1,1,1,1,1,1,1] *20

In [3]:
trans_dict= {"Liebes":"Love",
             "Erotik":"Erotic",
             "Adels":"Nobility",
            "Krimi":"Crime",
             "Western":"Western",
             "Arzt":"Doctoral",
              "Heimat":"Country",
              "Horror":"Horror",
              "SciFi":"SciFi",
               "Abenteuer":"Adventure",
              "Familien":"Family",
              "Geschichte":"History"}
col_dict= {"Liebes":"blue",
             "Erotik":"pink",
             "Adels":"skyblue",
            "Krimi":"red",
             "Western":"peru",
             "Arzt":"aquamarine",
              "Heimat":"navy",
              "Horror":"black",
              "SciFi":"silver",
               "Abenteuer":"green",
              "Familien":"cadetblue",
              "Geschichte":"olive"}

In [2]:
data = pd.read_csv("dash_input.tsv",sep="\t", index_col=0)

In [4]:
def rmv(x):
    
    return re.sub("^\s+|\s+$","",x)
def clean_authors(x,pattern):
    if len(pattern.findall(x)) > 0:
        x = re.sub("\[.*","",x)
        x = rmv(x)
        x = x.split(",")
        try:
            x = x[1] +" "+ x[0]
        except:
            x = x[0]
    else:
        x = re.sub("\,.*","",x)
        x = rmv(x)
    x = rmv(x)
    return x

In [6]:
data = pd.read_csv("dash_input.tsv",sep="\t", index_col=0)
pattern = re.compile("\[")
data.author = data.author.apply(lambda x: clean_authors(x,pattern) )

In [8]:
pattern = re.compile("\[")
pattern.findall('Hathaway, Andrew [Tn7]')

['[']

In [9]:
authors = np.unique(data.author)
full_frame = pd.DataFrame()
for author in authors:
    if author == "0":
        continue
        
    combos = np.unique(data[data.author==author]["label"])
    if "Western" in combos and "Adels" in combos:
        print(author)
    if len(combos) == 1:
        comb_frame = pd.DataFrame([combos[0],combos[0]]).T
    else:
        comb = list(combinations(combos,2))
        comb_frame = pd.DataFrame(comb)
        
    full_frame = pd.concat([full_frame, comb_frame], axis=0)

Friederike von Buchner


In [10]:
full_frame = full_frame.rename(columns={0:"soucre",1:"destination"})
full_frame = full_frame.reindex()

In [11]:
full_frame["Count"] = 1

In [13]:
genres = pd.DataFrame()
genres["genre"]=np.unique(data.label)
genres["genre"] = ["Liebes","Adels","Heimat","Arzt","Familien","Erotik","Geschichte",
                  "Abenteuer","Horror","Krimi","SciFi","Western"]
genres["genre2"] = genres["genre"] 
genres["color"] = genres["genre"].apply(lambda x: col_dict[x])
genres["label"] = genres["genre"].apply(lambda x: trans_dict[x])

In [14]:
# Count the routes between Airports
route_counts = full_frame.groupby(['soucre', 'destination']).Count.count().reset_index()
nodes = hv.Dataset(genres, 'genre', 'label')
chord = hv.Chord((route_counts, nodes), ['soucre', 'destination'], ['Count'])

# Select the 20 busiest airports
busiest = list(full_frame.groupby('soucre').count().sort_values('Count').index.values)
busiest_airports = chord.select(AirportID=busiest, selection_mode='nodes')

In [None]:
WARNING:param.ChordPlot08509: Popping unknown keys [''] from fontsize dictionary.
Valid keys: ['xlabel', 'ylabel', 'zlabel', 'labels', 'xticks', 'yticks', 'zticks', 'ticks', 'minor_xticks', 'minor_yticks', 'minor_ticks', 'title', 'legend', 'legend_title']

In [17]:
busiest_airports.opts(
    opts.Chord(cmap=list(genres["color"]), 
               edge_color=dim('soucre').str(), 
               height=800, labels=dim('genre'), node_color=dim('genre').str(), width=800,
               label_text_font_size="14pt"))


In [19]:
authors = np.unique(data.author)
full_frame = pd.DataFrame()
for author in authors:
    if author == "0":
        continue
        
    combos = np.unique(data[data.author==author]["publisher"])
    if "Western" in combos and "Adels" in combos:
        print(author)
    if len(combos) == 1:
        comb_frame = pd.DataFrame([combos[0],combos[0]]).T
    else:
        comb = list(combinations(combos,2))
        comb_frame = pd.DataFrame(comb)
        
    full_frame = pd.concat([full_frame, comb_frame], axis=0)

In [20]:
full_frame = full_frame.rename(columns={0:"soucre",1:"destination"})
full_frame = full_frame.reindex()

In [21]:
full_frame["Count"] = 1

In [25]:
genres = pd.DataFrame()
genres["publisher"]=np.unique(data.publisher)
#genres["color"] = genres["genre"].apply(lambda x: col_dict[x])
genres["label"] = genres["publisher"]

In [27]:
# Count the routes between Airports
route_counts = full_frame.groupby(['soucre', 'destination']).Count.count().reset_index()
nodes = hv.Dataset(genres, 'publisher', 'label')
chord = hv.Chord((route_counts, nodes), ['soucre', 'destination'], ['Count'])

# Select the 20 busiest airports
busiest = list(full_frame.groupby('soucre').count().sort_values('Count').index.values)
busiest_airports = chord.select(AirportID=busiest, selection_mode='nodes')

In [32]:
busiest_airports.opts(
    opts.Chord(edge_color=dim('soucre').str(), 
               height=800, labels=dim('publisher'), width=800,
               label_text_font_size="14pt"))

In [31]:
full_frame

Unnamed: 0,soucre,destination,Count
0,CORA Verlag,CORA Verlag,1
0,CORA Verlag,CORA Verlag,1
0,CORA Verlag,CORA Verlag,1
0,CORA Verlag,CORA Verlag,1
0,Bastei Lübbe,Bastei Lübbe,1
0,CORA Verlag,CORA Verlag,1
0,Martin Kelter Verlag,Martin Kelter Verlag,1
0,Martin Kelter Verlag,Martin Kelter Verlag,1
0,CORA Verlag,CORA Verlag,1
0,Martin Kelter Verlag,Martin Kelter Verlag,1
