In [110]:
from matplotlib import pyplot as plt
%matplotlib inline

In [111]:
from __future__ import print_function

import os
import subprocess

import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor, export_graphviz
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import Imputer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [112]:
file = '20160301_TE_survey_cleaned.csv'

In [113]:
df = pd.read_csv(file)

In [114]:
df.columns.values

array(['ID', 'T(K)', 'Z*10^-4 reported', 'Resist(Ohm.cm)',
       'Seebeck(uV/K)', 'kappaZT', 'Unnamed: 6', 'Resist(400K)',
       'Seebeck(400K)', 'Pf(W/K^2/m)', 'ZT', 'kappa(W/mK)', 'x',
       'Formula', 'series', 'T_Max', 'family', 'Unnamed: 17',
       'Unnamed: 18', 'Conduct(S/cm)', 'Power_Factor*T_(W/mK)',
       'preparative_route', 'final_form', 'cell_volume(A^3)',
       'formula_units_per_cell', 'atoms_per_formula_unit',
       'total_atoms_per_unit_cell', 'average_atomic_volume',
       'ICSD_of_structure', 'temp of ICSD (K)', 'S^2', 'ke/ktotal',
       'space_group', 'number_symmetry_elements', 'Unnamed: 34',
       'Unnamed: 35'], dtype=object)

In [115]:
df['Seebeck(uV/K)'].describe()

count    1098.000000
mean      -40.155270
std       192.691410
min      -752.196000
25%      -163.359500
50%       -67.600000
75%        99.358325
max      1235.430000
Name: Seebeck(uV/K), dtype: float64

### For the decision tree we need to bin the target values, so that we create a mock classification problem.

In [116]:
df['bin'] = pd.cut(df['Seebeck(uV/K)'], 50) #Breaking the Seebeck column into 50
                                            #bins, this number is arbitraty.

In [117]:
df = df.sort_values(['bin'], ascending = True) #sort the dataframe by ascending order
                                                #of the bins.

In [118]:
df['bin'].head()

1098                     NaN
1031    (-754.184, -712.443]
328     (-672.691, -632.938]
1032    (-632.938, -593.186]
124     (-632.938, -593.186]
Name: bin, dtype: category
Categories (50, interval[float64]): [(-754.184, -712.443] < (-712.443, -672.691] < (-672.691, -632.938] < (-632.938, -593.186] ... (1076.42, 1116.172] < (1116.172, 1155.925] < (1155.925, 1195.677] < (1195.677, 1235.43]]

In [119]:
#Pulling the columns we're interested in using for our predictors.
reduced_df = df[['Resist(Ohm.cm)', 'T(K)', 'Seebeck(uV/K)', 'average_atomic_volume', 'space_group', 'bin']]

In [120]:
reduced_df.head()

Unnamed: 0,Resist(Ohm.cm),T(K),Seebeck(uV/K),average_atomic_volume,space_group,bin
1098,,,,,,
1031,2.91889,300.0,-752.196,13.52,139.0,"(-754.184, -712.443]"
328,0.302535,300.0,-650.91,10.3485,62.0,"(-672.691, -632.938]"
1032,2.16164,400.0,-618.425,13.52,139.0,"(-632.938, -593.186]"
124,0.460937,400.0,-600.164,10.3485,62.0,"(-632.938, -593.186]"


### I need to assign each bin a number.  I created a new column with these bin numbers.

In [121]:
def encode_target(df, target_column):
    """Add column to data that assigns integers to the binned Seebeck data"""
    df_mod = df.copy()
    targets = df_mod[target_column].unique()
    map_to_int = {name: n for n, name in enumerate(targets)}
    df_mod['Target'] = df_mod[target_column].replace(map_to_int)
    
    return (df_mod, targets)

In [122]:
df2, targets = encode_target(reduced_df, 'bin')

In [123]:
df2.head()

Unnamed: 0,Resist(Ohm.cm),T(K),Seebeck(uV/K),average_atomic_volume,space_group,bin,Target
1098,,,,,,,0
1031,2.91889,300.0,-752.196,13.52,139.0,"(-754.184, -712.443]",1
328,0.302535,300.0,-650.91,10.3485,62.0,"(-672.691, -632.938]",2
1032,2.16164,400.0,-618.425,13.52,139.0,"(-632.938, -593.186]",3
124,0.460937,400.0,-600.164,10.3485,62.0,"(-632.938, -593.186]",3


In [124]:
#Clean up the NA values
df3 = df2.dropna()

In [125]:
#Break data into x and y
df3_x = pd.DataFrame(df3[['Resist(Ohm.cm)', 'T(K)', 'average_atomic_volume', 'space_group']])
df3_y = pd.DataFrame(df3['Target'])
df3_reg_y = pd.DataFrame(df3['Seebeck(uV/K)'])

In [126]:
#this just makes it easy to call all x parameters later on.
features = list(df3_x.columns[:4])

In [127]:
print (features)

['Resist(Ohm.cm)', 'T(K)', 'average_atomic_volume', 'space_group']


In [128]:
#Split into test/train
x_train, x_test = train_test_split(df3_x, test_size=0.2)
y_train, y_test = train_test_split(df3_y, test_size=0.2)

In [129]:
#Fit the decision tree,
#1st iteration dt = DecisionTreeRegressor(min_samples_split = 20, random_state = 99)
#Predictor gets better when max depth is decreased, but you loose too much info after a certain point.
y_fit_cla = y_train['Target']
X_fit_cla = x_train[features]
dt_cla = DecisionTreeClassifier(max_depth = 10, random_state = 99)
dt_fit_cla = dt_cla.fit(X_fit_cla, y_fit_cla)

In [130]:
dt_cla_pred = dt_cla.predict(x_test[features])
score = accuracy_score(y_test, dt_pred)
print(score)

NameError: name 'dt_pred' is not defined

In [131]:
#Split into test/train
x_train_reg, x_test_reg = train_test_split(df3_x, test_size=0.2)
y_train_reg, y_test_reg = train_test_split(df3_reg_y, test_size=0.2)

In [132]:
#Fit the decision tree,
#1st iteration dt = DecisionTreeRegressor(min_samples_split = 20, random_state = 99)
#Predictor gets better when max depth is decreased, but you loose too much info after a certain point.
y_fit = y_train_reg['Seebeck(uV/K)']
X_fit = x_train[features]
dt = DecisionTreeRegressor(max_depth = 10, random_state = 99)
dt_fit = dt.fit(x_train_reg, y_train_reg)

In [133]:
#Check the predictor.
dt_pred = dt.predict(x_test[features])
score = dt.score(x_test_reg, y_test_reg)
print (score)

-0.3731405657918343


In [134]:
#Random Forest is a different call, lets  try this out.
rdt = RandomForestRegressor(random_state=1010, n_estimators=100, max_depth = 10)
rdt_fit = rdt.fit(x_train_reg[features], y_train_reg['Seebeck(uV/K)'])
score = rdt.score(x_test_reg, y_test_reg)
print (score)

-0.14139435059047512


In [135]:
#Random Forest is a different call, lets  try this out.
rdt = RandomForestClassifier(random_state=1010, n_estimators=100, max_depth = 10)
rdt_fit = rdt.fit(x_train[features], y_train['Target'])
score = rdt.score(x_test, y_test)
print (score)

0.07009345794392523


In [20]:
#I was following along with a demo and the author mentioned a way to visalize
#the tree.  I haven't been able to get this to work yet.
def visualize_tree(tree, feature_names):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open("dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")

In [21]:
visualize_tree(dt, features)

In [136]:
def encode_columns(df, df_mod2, target_column):
    """Change unique column values to integers"""
    targets = df_mod2[target_column].unique()
    map_to_int = {name: n for n, name in enumerate(targets)}
    df_mod2[target_column] = df_mod2[target_column].replace(map_to_int)
    
    return

In [137]:
df_mod2 = df.copy()
encode_columns(df, df_mod2, 'preparative_route')

In [138]:
df_mod2['preparative_route'].head()

1098    0
1031    1
328     2
1032    3
124     2
Name: preparative_route, dtype: int64

In [139]:
reduced_df2 = df_mod2[['T(K)', 'Resist(Ohm.cm)', 'Seebeck(uV/K)', 'Resist(400K)', 'Conduct(S/cm)', 'preparative_route', 
                    'cell_volume(A^3)', 'average_atomic_volume', 'space_group', 'number_symmetry_elements', 'bin']]

In [140]:
reduced_df3 = reduced_df2.dropna()

In [141]:
df4, targets = encode_target(reduced_df3, 'bin')

In [142]:
df4_x_1 = pd.DataFrame(df4[['T(K)', 'Resist(Ohm.cm)', 'Resist(400K)', 'Conduct(S/cm)', 'preparative_route', 
                    'cell_volume(A^3)', 'average_atomic_volume', 'space_group', 'number_symmetry_elements']])
df4_y_1 = pd.DataFrame(df4['Target'])
df4_reg_y_1 = pd.DataFrame(df4['Seebeck(uV/K)'])

In [143]:
x_train_1, x_test_1 = train_test_split(df4_x_1, test_size=0.2)
y_train_1, y_test_1 = train_test_split(df4_y_1, test_size=0.2)
y_train_reg_1, y_train_reg_2 = train_test_split(df4_reg_y_1, test_size=0.2)

In [144]:
features_1 = list(df4_x_1.columns[:9])

In [145]:
print (features_1)

['T(K)', 'Resist(Ohm.cm)', 'Resist(400K)', 'Conduct(S/cm)', 'preparative_route', 'cell_volume(A^3)', 'average_atomic_volume', 'space_group', 'number_symmetry_elements']


In [146]:
dt_1 = DecisionTreeClassifier(max_depth = 10, random_state = 99)
dt_fit_1 = dt_1.fit(x_train_1[features_1], y_train_1['Target'],)

In [147]:
dt_pred_1 = dt_fit_1.score(x_test_1, y_test_1)
print (dt_pred_1)

0.14705882352941177


In [148]:
rdt_1 = RandomForestRegressor(random_state=1010, n_estimators=100, max_depth = 10)
rdt_fit_1 = rdt_1.fit(x_train_1[features_1], y_train_reg_1['Seebeck(uV/K)'])

In [149]:
rdt_pred_1 = rdt_fit_1.score(x_test_1, y_train_reg_2)
print (rdt_pred)

NameError: name 'rdt_pred' is not defined

In [150]:
import plotly
plotly.__version__

'2.4.1'

In [151]:
import plotly.plotly as py
import plotly.figure_factory as ff
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap

In [257]:
elements.head()

Unnamed: 0,atomic number,symbol,name,atomic mass,CPK,electronic configuration,electronegativity,atomic radius,ion radius,van der Waals radius,...,EA,standard state,bonding type,melting point,boiling point,density,metal,year discovered,group,period
0,1,H,Hydrogen,1.00794,#FFFFFF,1s1,2.2,37.0,,120.0,...,-73.0,gas,diatomic,14.0,20.0,9e-05,nonmetal,1766,1,1
1,2,He,Helium,4.002602,#D9FFFF,1s2,,32.0,,140.0,...,0.0,gas,atomic,,4.0,0.0,noble gas,1868,18,1
2,3,Li,Lithium,6.941,#CC80FF,[He] 2s1,0.98,134.0,76 (+1),182.0,...,-60.0,solid,metallic,454.0,1615.0,0.54,alkali metal,1817,1,2
3,4,Be,Beryllium,9.012182,#C2FF00,[He] 2s2,1.57,90.0,45 (+2),,...,0.0,solid,metallic,1560.0,2743.0,1.85,alkaline earth metal,1798,2,2
4,5,B,Boron,10.811,#FFB5B5,[He] 2s2 2p1,2.04,82.0,27 (+3),,...,-27.0,solid,covalent network,2348.0,4273.0,2.46,metalloid,1807,13,2


In [269]:

periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]

df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str) 
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"] #cleaning elements dataframe?
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]

cmap = {
    "alkali metal"         : "#a6cee3",
    "alkaline earth metal" : "#1f78b4",
    "metal"                : "#d93b43",
    "halogen"              : "#999d9a",
    "metalloid"            : "#e08d49",
    "noble gas"            : "#eaeaea",
    "nonmetal"             : "#f1d4Af",
    "transition metal"     : "#599d7A",
} # Creating a dictionary?

source = ColumnDataSource(df)

p = figure(title="Periodic Table (omitting LA and AC Series)", plot_width=1000, plot_height=450,
           tools="", toolbar_location=None,
           x_range=groups, y_range=list(reversed(periods)))

p.rect("group", "period", 0.95, 0.95, source=source, fill_alpha=0.6, legend="metal",
       color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))

text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}

x = dodge("group", -0.4, range=p.x_range)

r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style="bold"

r = p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props)
r.glyph.text_font_size="8pt"

r = p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props)
r.glyph.text_font_size="5pt"

r = p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props)
r.glyph.text_font_size="5pt"

p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")

p.add_tools(HoverTool(tooltips = [
    ("Name", "@name"),
    ("Atomic number", "@{atomic number}"),
    ("Atomic mass", "@{atomic mass}"),
    ("Type", "@metal"),
    ("CPK color", "$color[hex, swatch]:CPK"),
    ("Electronic configuration", "@{electronic configuration}"),
]))

p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"

show(p)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Count [renderer: GlyphRenderer(id='904ac113-7f3c-43c5-bc54-3c4bd1a85f77', ...)]
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='2bee0d6b-9bd6-4824-a2a3-a5ab31dfe7ff', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4897146f-7a74-44a1-8a40-d657bc732147', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='49f47169-00ac-4bd2-b4d4-59440095ce48', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4aaea623-ae2b-44a6-a9f6-0c5709e50511', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='ef037ddd-890d-42b3-990c-cfe1c40aa88e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout

In [254]:
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox, layout
from bokeh.models.widgets import Select

In [270]:
output_file("select.html")

temp_select = Select(title="Temperature:", value='300 K', options=["300 K", "400 K", "700 K", "1000 K"])

show(layout([[p],[temp_select]]))

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Count [renderer: GlyphRenderer(id='904ac113-7f3c-43c5-bc54-3c4bd1a85f77', ...)]
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='2bee0d6b-9bd6-4824-a2a3-a5ab31dfe7ff', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4897146f-7a74-44a1-8a40-d657bc732147', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='49f47169-00ac-4bd2-b4d4-59440095ce48', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4aaea623-ae2b-44a6-a9f6-0c5709e50511', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='ef037ddd-890d-42b3-990c-cfe1c40aa88e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout

In [226]:
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox, row
from bokeh.models.widgets import Dropdown
from bokeh.plotting import curdoc
from bokeh.models.widgets import Slider, TextInput

menu = [("Quaterly", "time_windows"), ("Half Yearly", "time_windows"), None, ("Yearly", "time_windows")]
dropdown = Dropdown(label="Time Period", button_type="warning", menu=menu)

def function_to_call(attr, old, new):
    print (dropdown.value)

dropdown.on_change('value', function_to_call)

curdoc().add_root(dropdown)
show(widgetbox(dropdown))

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Count [renderer: GlyphRenderer(id='904ac113-7f3c-43c5-bc54-3c4bd1a85f77', ...)]
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='2bee0d6b-9bd6-4824-a2a3-a5ab31dfe7ff', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4897146f-7a74-44a1-8a40-d657bc732147', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='49f47169-00ac-4bd2-b4d4-59440095ce48', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4aaea623-ae2b-44a6-a9f6-0c5709e50511', ...)

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



In [None]:
from os.path import join, dirname
import datetime

import pandas as pd
from scipy.signal import savgol_filter

from bokeh.io import curdoc
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, DataRange1d, Select
from bokeh.palettes import Blues4
from bokeh.plotting import figure

In [152]:
from temann.interpret import get_empirical_formula

In [153]:
periodic_df = df[['Seebeck(uV/K)', 'Pf(W/K^2/m)', 'Formula', 'T(K)']]

In [154]:
periodic_df.dropna()

Unnamed: 0,Seebeck(uV/K),Pf(W/K^2/m),Formula,T(K)
1031,-752.196,1.938400e-05,Nd2CuO4,300.0
328,-650.910,1.400450e-04,CaMnO3,300.0
1032,-618.425,1.769260e-05,Nd2CuO4,400.0
124,-600.164,7.814450e-05,CaMnO3,400.0
329,-587.185,1.426560e-04,CaMnO3,400.0
34,-572.037,6.534590e-06,CaMnO3,300.0
35,-549.121,1.030860e-05,CaMnO3,400.0
642,-476.683,3.257540e-06,CaMnO3,300.0
36,-477.482,3.544360e-05,CaMnO3,700.0
786,-436.926,6.427760e-06,ZnO,1000.0


In [155]:
periodic_df1 = periodic_df[periodic_df['Seebeck(uV/K)'] < -150] 

In [157]:
periodic_df1 = periodic_df1.append(periodic_df[periodic_df['Seebeck(uV/K)'] > 150])

In [39]:
periodic_df1['T(K)'].unique()

array([ 300.,  400.,  700., 1000.,   nan])

In [158]:
T_300 = pd.DataFrame(periodic_df1[periodic_df1['T(K)'] == 300])

In [159]:
T_400 = pd.DataFrame(periodic_df1[periodic_df1['T(K)'] == 400])

In [160]:
T_700 = pd.DataFrame(periodic_df1[periodic_df1['T(K)'] == 700])

In [161]:
T_1000 = pd.DataFrame(periodic_df1[periodic_df1['T(K)'] == 1000])

In [162]:
def parse_multiple_formulas(df, target_df):
    for idx in range(len(df['Formula'])):
        parsed_formula = get_empirical_formula(df['Formula'].iloc[idx])
        target_df.append(parsed_formula)
        idx = idx + 1
    return (target_df)

In [163]:
parsed_T_300 = []
parse_multiple_formulas(periodic_df1, parsed_T_300)

[{'Cu': 1, 'Nd': 2, 'O': 4},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Cu': 1, 'Nd': 2, 'O': 4},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'O': 1, 'Zn': 1},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'O': 1, 'Zn': 1},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'O': 1, 'Zn': 1},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Al': 0.0196, 'Ni': 0.02, 'O': 1, 'Zn': 0.9604},
 {'Al': 0.0196, 'Ni': 0.02, 'O': 1, 'Zn': 0.9604},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Fe': 1.98, 'O': 3, 'Sn': 0.02},
 {'Fe': 1.98, 'O': 3, 'Sn': 0.02},
 {'Fe': 1.98, 'O': 3, 'Ti': 0.02},
 {'Fe': 1.98, 'O': 3, 'Ti': 0.02},
 {'Ca': 0.9, 'In': 0.1, 'Mn': 1, 'O': 3},
 {'Al': 0.02, 'O': 1, 'Zn': 0.98},
 {'Ca': 1, 'Mn': 1, 'O': 3},
 {'Fe': 1.98, 'O': 3, 'Ti': 0.02},
 {'Fe': 1.96, 'O': 3, 'Ti': 0.04},
 {'Fe': 1.98, 'O': 3, 'Ti': 0.02},
 {'Fe': 1.98, 'O': 3, 'Sn': 0.02},
 {'Fe': 1.98, 'O': 3, 'Sn': 0.02},
 {'Fe': 1.96, 'O': 3, 'Sn': 0.04},
 {'Ca

In [69]:
parsed_T_300_df = pd.DataFrame(parsed_T_300)

In [70]:
parsed_T_300_df.head()

Unnamed: 0,Al,Ba,Bi,Ca,Co,Cu,Dy,Fe,Ga,Gd,...,Sn,Sr,Tb,Te,Ti,Tl,Y,Yb,Zn,Zr
0,,,,1.0,,,,,,,...,,,,,,,,,,
1,,,,1.0,,,,,,,...,,,,,,,,,,
2,,,,1.0,,,,,,,...,,,,,,,,,,
3,,,,1.0,,,,,,,...,,,,,,,,,,
4,,,,0.98,,,,,,,...,,,,,,,,,,


In [84]:
results_T_300 = parsed_T_300_df[parsed_T_300_df.notnull()].count()

In [201]:
results_T_300_df = pd.DataFrame(results_T_300)

In [202]:
results_T_300_df = results_T_300_df.rename(columns = {0 : 'Count'})

In [204]:
results_T_300_df['symbol'] = results_T_300_df.index

In [205]:
results_T_300_df.head()

Unnamed: 0,Count,symbol
Al,41,Al
Ba,13,Ba
Bi,17,Bi
Ca,62,Ca
Co,40,Co


In [207]:
atomic_num = pd.DataFrame(elements[['symbol', 'atomic number']])

In [209]:
atomic_num_concat = (pd.merge(atomic_num, results_T_300_df, on='symbol'))

In [210]:
atomic_num_concat.head()

Unnamed: 0,symbol,atomic number,Count
0,Li,3,1
1,O,8,175
2,Mg,12,15
3,Al,13,41
4,Si,14,27


In [183]:
from bokeh.io import output_file, show
from bokeh.models import BasicTicker, ColumnDataSource, HoverTool, LinearColorMapper, ColorBar, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap
from bokeh.palettes import Viridis5
from bokeh.transform import transform

In [267]:
output_file("periodic.html")

periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]

df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str) 
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"] #cleaning elements dataframe?
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]


source = ColumnDataSource(df)

p = figure(title="Periodic Table (omitting LA and AC Series)", plot_width=1000, plot_height=450,
           tools="", toolbar_location=None,
           x_range=groups, y_range=list(reversed(periods)))

mapper = LinearColorMapper(palette=Viridis5, low=results_T_300_df.Count.min(), high=results_T_300_df.Count.max())

p.rect("group", "period", 0.95, 0.95, source=source, fill_alpha=0.6, legend="metal", fill_color=transform('Count', mapper))


color_bar = ColorBar(color_mapper=mapper, location=(0, 0))

p.add_layout(color_bar, 'right')

text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}

x = dodge("group", -0.4, range=p.x_range)

r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style="bold"

r = p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props)
r.glyph.text_font_size="8pt"

r = p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props)
r.glyph.text_font_size="5pt"

r = p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props)
r.glyph.text_font_size="5pt"

p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")

p.add_tools(HoverTool(tooltips = [
    ("Name", "@name"),
    ("Atomic number", "@{atomic number}"),
    ("Atomic mass", "@{atomic mass}"),
    ("Type", "@metal"),
    ("CPK color", "$color[hex, swatch]:CPK"),
    ("Electronic configuration", "@{electronic configuration}"),
]))

p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"

show(p)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Count [renderer: GlyphRenderer(id='904ac113-7f3c-43c5-bc54-3c4bd1a85f77', ...)]
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='2bee0d6b-9bd6-4824-a2a3-a5ab31dfe7ff', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4897146f-7a74-44a1-8a40-d657bc732147', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='49f47169-00ac-4bd2-b4d4-59440095ce48', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4aaea623-ae2b-44a6-a9f6-0c5709e50511', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='ef037ddd-890d-42b3-990c-cfe1c40aa88e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout

In [266]:
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]

df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str) 
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"] #cleaning elements dataframe?
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]

cmap = {
    "alkali metal"         : "#a6cee3",
    "alkaline earth metal" : "#1f78b4",
    "metal"                : "#d93b43",
    "halogen"              : "#999d9a",
    "metalloid"            : "#e08d49",
    "noble gas"            : "#eaeaea",
    "nonmetal"             : "#f1d4Af",
    "transition metal"     : "#599d7A",
} # Creating a dictionary?

source = ColumnDataSource(df)

p = figure(title="Periodic Table (omitting LA and AC Series)", plot_width=1000, plot_height=450,
           tools="", toolbar_location=None,
           x_range=groups, y_range=list(reversed(periods)))

p.rect("group", "period", 0.95, 0.95, source=source, fill_alpha=0.6, legend="metal",
       color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))

text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}

x = dodge("group", -0.4, range=p.x_range)

r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style="bold"

r = p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props)
r.glyph.text_font_size="8pt"

r = p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props)
r.glyph.text_font_size="5pt"

r = p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props)
r.glyph.text_font_size="5pt"

p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")

p.add_tools(HoverTool(tooltips = [
    ("Name", "@name"),
    ("Atomic number", "@{atomic number}"),
    ("Atomic mass", "@{atomic mass}"),
    ("Type", "@metal"),
    ("CPK color", "$color[hex, swatch]:CPK"),
    ("Electronic configuration", "@{electronic configuration}"),
]))

p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"

show(p)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Count [renderer: GlyphRenderer(id='904ac113-7f3c-43c5-bc54-3c4bd1a85f77', ...)]
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='2bee0d6b-9bd6-4824-a2a3-a5ab31dfe7ff', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4897146f-7a74-44a1-8a40-d657bc732147', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='49f47169-00ac-4bd2-b4d4-59440095ce48', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4aaea623-ae2b-44a6-a9f6-0c5709e50511', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='ef037ddd-890d-42b3-990c-cfe1c40aa88e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout

In [265]:
from collections import OrderedDict

from bokeh.plotting import figure, show, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.sampledata import periodic_table
from bokeh.embed import components



elements = periodic_table.elements[periodic_table.elements["group"] != "-"]

group_range = [str(x) for x in range(1,19)]
period_range = [str(x) for x in reversed(sorted(set(elements["period"])))]

def electroneg_to_discrete(e_val):
    if e_val < 1.5:
        return '0_to_1.5'
    elif e_val < 1.9:
        return '1.5_to_1.9'
    elif e_val < 2.9:
        return '2.0_to_2.9'
    elif e_val <= 4.0:
        return '3.0_to_4.0'
    else:
        return 'NaN'

electroneg_discrete = map(lambda x: electroneg_to_discrete(x), elements['electronegativity'])


colormap = {
    '0_to_1.5': '#ffffcc',
    '1.5_to_1.9' : '#a1dab4',
    '2.0_to_2.9': '#41b6c4',
    '3.0_to_4.0': '#225ea8',
    'NaN': '#bdbdbd'


}

source = ColumnDataSource(
    data = dict(
    group=[str(x) for x in elements["group"]],
    period=[str(y) for y in elements["period"]],
    symx=[str(x)+":0.1" for x in elements["group"]],
    name = elements["name"],
    numbery=[str(x)+":0.8" for x in elements["period"]],
    electronegativity = elements["electronegativity"],
    symbol = elements["symbol"],
    namey=[str(x)+":0.3" for x in elements["period"]],
    atomic_number = elements['atomic number'],
    type_color=[colormap[x] for x in electroneg_discrete],
    electronegativityy = [str(x)+':0.1' for x in elements['period']]))

output_file("electronegativity_table.html")


p = figure(title="Electronegativity", tools="hover,save",
          x_range=group_range, y_range=period_range)
p.plot_width = 1200
p.toolbar_location = "left"

p.rect("group", "period", 0.9, 0.9, source=source,
      fill_alpha=0.6, color="type_color")

text_props = {
    "source": source,
    "angle": 0,
    "color": "black",
    "text_align": "left",
    "text_baseline": "middle"
}

p.text(x="symx", y="period", text="symbol",
    text_font_style="bold", text_font_size="15pt", **text_props)

p.text(x="symx", y = 'numbery', text='atomic_number',
      text_font_size='9pt', **text_props)

p.text(x="symx", y="namey", text="name",
      text_font_size="6pt", **text_props)

p.grid.grid_line_color = None

hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
        ("name", "@name"),
        ("atomic number", "@atomic_number"),
        ("electronegativity", "@electronegativity")
    ])

script_file = open('electronegativity_table_script.html', 'w+')
div_file = open('electronegativity_table_div.html', 'w+')

script, div = components(p)
print(script, file=script_file)
print(div, file=div_file)

show(p)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Count [renderer: GlyphRenderer(id='904ac113-7f3c-43c5-bc54-3c4bd1a85f77', ...)]
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='2bee0d6b-9bd6-4824-a2a3-a5ab31dfe7ff', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4897146f-7a74-44a1-8a40-d657bc732147', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='49f47169-00ac-4bd2-b4d4-59440095ce48', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Dropdown(id='4aaea623-ae2b-44a6-a9f6-0c5709e50511', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout box: Figure(id='ef037ddd-890d-42b3-990c-cfe1c40aa88e', ...)
W-1004 (BOTH_CHILD_AND_ROOT): Models should not be a document root if they are in a layout