In [2]:
import pandas as pd
import numpy as np
import os
import qgrid
import bokeh

from bokeh.plotting import figure, show, output_notebook, output_file
output_notebook()
from IPython.display import Image, HTML, Javascript

rootdir = '/Users/Imperssonator/Dropbox/OFET Fab Table/OFET-Database-master/Papers/'
exclude_papers = ['FullFact 2016', 'VarAge 2016', 'SprayCool 2016']


In [3]:
df = pd.DataFrame()

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        if file[-4:] == 'xlsx':
            if file[:-5] not in exclude_papers:
                dfi = pd.read_excel(os.path.join(subdir,file),index_col=0,header=None)
                dfi = dfi.transpose()
                df = df.append(dfi)
                print(file)

# Get rid of trailing spaces
df['DOI']=df['DOI'].str.strip()
df.reset_index(inplace=True)

Aiyar 2011.xlsx
Aiyar 2013.xlsx
Bielecka 2011.xlsx
Chang 2004.xlsx
Chang 2006.xlsx
Chang 2013.xlsx
Chang 2014.xlsx
Chang 2016.xlsx
Cho 2006.xlsx
Choi 2014.xlsx
Chu 2016.xlsx
Jiang 2011.xlsx
Kleinhenz 2016.xlsx
Kline 2003.xlsx
Kline 2005.xlsx
Newbloom 2011.xlsx
Park 2014.xlsx
Park,Ko 2014.xlsx
Scharsich 2012.xlsx
Singh 2008.xlsx
Surin 2006.xlsx
Verilhac 2006.xlsx
Wang 2015.xlsx
Zen 2004.xlsx
Zhao 2013.xlsx


In [4]:
qgrid.show_grid(df,grid_options={'forceFitColumns':False, 'defaultColumnWidth':80})

In [5]:
top_mobs=df.groupby('DOI')['RTMob'].idxmax()
top_mobs.tolist()
table_cols = ['Author','Year','VarParam','Mw','RR','Solv1','Solv2','InitConc','SubsTreat','Depo','AnnTime','AnnTemp','ChanLen','RTMob']
qgrid.show_grid(df[table_cols].loc[top_mobs],grid_options={'forceFitColumns':False, 'defaultColumnWidth':80})

## Isolating only rows with AFM data

In [6]:
df_S = df[~df['Sfull'].isnull()]
qgrid.show_grid(df_S,grid_options={'forceFitColumns':False, 'defaultColumnWidth':80})

In [7]:
dalsu_relevant = ['RTMob','FilmAggFrac','FilmEB','Sfull','CorrLen','GrainSize100','Herman']

In [8]:
corr_mat = np.empty([len(df['DOI'].unique().tolist()), len(dalsu_relevant)-1])
for i,doi in enumerate(df['DOI'].unique().tolist()):
    df_doi = df[df['DOI']==doi]
    corr_doi = df_doi[dalsu_relevant].astype(float).corr()['RTMob'].as_matrix()[1:]
    corr_mat[i]=corr_doi
    
df_corr = pd.DataFrame(data=corr_mat,
                       columns=dalsu_relevant[1:],
                       index=df['DOI'].unique().tolist()
                       )

plot_order = ['10.1002/adfm.201403708 ',
              '10.1021/am506546ki',
              '10.1021/acs/chemmater.6b01163',
              '10.1021/acsnano.5b02582',
              '10.1021/acsami.6b02216',
              ]

df_corr=df_corr.reindex(plot_order).fillna(value=0).abs()
df_corr

Unnamed: 0,FilmAggFrac,FilmEB,Sfull,CorrLen,GrainSize100,Herman
10.1002/adfm.201403708,0.0,0.0,0.0,0.0,0.0,0.0
10.1021/am506546ki,0.887237,0.901804,0.0,0.0,0.0,0.0
10.1021/acs/chemmater.6b01163,0.000493,0.453844,0.272443,0.458978,0.68168,0.508212
10.1021/acsnano.5b02582,0.0,0.980642,0.895557,0.762817,0.0,0.951635
10.1021/acsami.6b02216,0.0,0.624677,0.782534,0.621492,0.724021,0.0


In [10]:
corr_data = {'Process': ['Son. + 2-MP']*6
             + ['Son. + UV']*6
             + ['Son. + Aging']*6
             + ['MF + UV']*6
             + ['UV + Slide']*6,
        'Structural Metric': ['% Aggregates',
                              'Exciton Bandwidth',
                              'Fiber Alignment (S2D)',
                              'Decay Length',
                              '(100) Grain Size',
                             """Herman's Orientation Factor"""
                             ]*5,
        'Correlation': df_corr.values.ravel().tolist()}

In [11]:
from bokeh.charts import HeatMap, output_file, show
from bokeh.palettes import BuGn9 as palette

output_notebook()

palette = palette[::-1]  # Reverse the color order so dark red is highest unemployment
hm = HeatMap(corr_data,
             x='Structural Metric',
             y='Process',
             values='Correlation',
             stat=None,
             width=700,
             height=500,
             legend=True
            )

show(hm)

In [10]:
from bokeh.charts import HeatMap, output_file, show

# (dict, OrderedDict, lists, arrays and DataFrames are valid inputs)
data = {'fruit': ['apples']*3 + ['bananas']*3 + ['pears']*3,
        'fruit_count': [4, 5, 8, 1, 2, 4, 6, 5, 4],
        'sample': [1, 2, 3]*3}

hm = HeatMap(data, x='fruit', y='sample', values='fruit_count',
             title='Fruits', stat=None)

show(hm)

In [None]:
hm.renderers[1].glyph.properties()

In [None]:
corr_global = df[dalsu_relevant].astype(float).corr().as_matrix()
df_corr_global = pd.DataFrame(data=corr_global,
                       columns=dalsu_relevant,
                        index=dalsu_relevant
                       )
df_corr_global

In [None]:
df_afm=df[df['DOI']==df['DOI'].unique()[2]]

qgrid.show_grid(df_afm[dalsu_relevant],grid_options={'forceFitColumns':False, 'defaultColumnWidth':80})

In [None]:
df_afm[dalsu_relevant].astype(float).corr()

In [None]:
df_ami=df[df['DOI']==df['DOI'].unique()[1]]

In [None]:
df_ami[dalsu_relevant].astype(float).corr()

In [None]:
df_am0=df[df['DOI']==df['DOI'].unique()[0]]

In [None]:
df_am0[dalsu_relevant].astype(float).corr()

In [None]:
df_nabil=df[df['DOI']==df['DOI'].unique()[3]]

In [None]:
df_nabil[dalsu_relevant].astype(float).corr()

In [None]:
df_gang=df[df['DOI']==df['DOI'].unique()[5]]

In [None]:
df_gang[dalsu_relevant].astype(float).corr()['RTMob'].as_matrix()[1:]

In [None]:
def plot_v_mob(df,col,color_col='Author'):
    colors = {}
    for c in df[color_col].unique():
        colors[c] = np.random.randint( 0,256, (1,3))
        df.loc[df[color_col]==c,'color'] = '#' + ''.join([ '%02x'%s for s in colors[c][0]] )
    df = df.loc[~df[col].isnull()]
    cds = bokeh.models.ColumnDataSource(df)
    plot = figure(width=500, height=500)
    plot.circle(source=cds,
                x=col,
                y='RTMob',
                fill_color=df['color'][~df[col].apply(lambda x: np.isnan(x))],
                size=10, 
                fill_alpha=0.6)
    plot.xaxis.axis_label = col
    plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'
    plot.add_tools(
        bokeh.models.HoverTool(
            tooltips=[
                ('Author','@Author')
            ]
        )
    )

    show(plot)

In [None]:
plot_v_mob(df,'Sfull')

In [None]:
x_var = 'FilmEB'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
x_var = 'Sfull'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
x_var = 'CorrLen'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
x_var = 'Herman'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
x_var = 'GrainSize100'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
x_var = 'Space010'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
x_var = 'SolAggFrac'

plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
plot = figure(width=500, height=500)
plot.circle(x=df[x_var][~df[x_var].apply(lambda x: np.isnan(x))],
            y=df['RTMob'][~df[x_var].apply(lambda x: np.isnan(x))],
            size=10, 
            fill_alpha=0.6)
plot.xaxis.axis_label = x_var
plot.yaxis.axis_label = 'Mobility (cm^2/Vs)'

show(plot)

In [None]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'NaN']

newdf = df.select_dtypes(include=numerics)
newdf

In [None]:
from scipy import io
a_dict = {col_name : df[col_name].values for col_name in df.columns.values}

## optional if you want to save the index as an array as well:
# a_dict[df.index.name] = df.index.values
scipy.io.savemat('ofet_db.mat', {'struct':a_dict})