In [1]:
import datetime
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [2]:
import sys
print(sys.version)

3.5.0 |Anaconda custom (64-bit)| (default, Dec  1 2015, 11:46:22) [MSC v.1900 64 bit (AMD64)]


<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Read data

In [3]:
cpu_raw_df = pd.read_csv('./data/CPU.csv')

In [4]:
cpu_raw_df.head()

Unnamed: 0,machine_id,date,timestamp,cpu_ghz,cpu_usage,idle,ip,total
0,t-657740490-UserCluster1-sysadmin,2017/8/8,2017-08-08 00:00:00+0000,1.2,0.087799,9278,192.168.41.16,10171
1,t-657740490-UserCluster1-sysadmin,2017/8/8,2017-08-08 00:00:02+0000,1.278,0.091266,9260,192.168.41.16,10190
2,t-657740490-UserCluster1-sysadmin,2017/8/8,2017-08-08 00:00:04+0000,1.224,0.092574,9263,192.168.41.16,10208
3,t-657740490-UserCluster1-sysadmin,2017/8/8,2017-08-08 00:00:06+0000,1.2,0.093147,9210,192.168.41.16,10156
4,t-657740490-UserCluster1-sysadmin,2017/8/8,2017-08-08 00:00:09+0000,1.2,0.089702,9255,192.168.41.16,10167


<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Formating data (in this case, only time formatting)

In [6]:
class CpuDataFormat:
    
    """ 
        1.Formatting the date and timestamp as datetime format.
        2.Set timestamp as index for drawing something.
    """
    
    def __init__(self, df):
        self.cpu_df = df
        
    def format_date_timestamp_as_index(self):
        # hint
        print('===Please be patient, it will cost few minutes.===')
        
        # date column formatting
        for index, row in self.cpu_df.iterrows():
            x = datetime.datetime.strptime(row['date'], '%Y/%m/%d')
            self.cpu_df.set_value(index, 'date', x)
        print('date formatted.')

        # timestamp formatting
        for index, row in self.cpu_df.iterrows():
            x = datetime.datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S%z').replace(tzinfo=None)
            self.cpu_df.set_value(index, 'timestamp', x)
        print('timestamp formatted.')
        
         # make a copy col
        self.cpu_df['timestampp'] = self.cpu_df['timestamp']

        # set datetime as index
        self.cpu_df.set_index('timestamp',inplace=True)
        print('timestamp as index.')

In [7]:
# Formatting the cpu_raw_df
cpu_a = CpuDataFormat(cpu_raw_df)
cpu_a.format_date_timestamp_as_index()

===Please be patient, it will cost few minutes.===
date formatted.
timestamp formatted.
timestamp as index.


In [8]:
# Show formatted data
cpu_a.cpu_df.head()

Unnamed: 0_level_0,machine_id,date,cpu_ghz,cpu_usage,idle,ip,total,timestampp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-08-08 00:00:00,t-657740490-UserCluster1-sysadmin,2017-08-08 00:00:00,1.2,0.087799,9278,192.168.41.16,10171,2017-08-08 00:00:00
2017-08-08 00:00:02,t-657740490-UserCluster1-sysadmin,2017-08-08 00:00:00,1.278,0.091266,9260,192.168.41.16,10190,2017-08-08 00:00:02
2017-08-08 00:00:04,t-657740490-UserCluster1-sysadmin,2017-08-08 00:00:00,1.224,0.092574,9263,192.168.41.16,10208,2017-08-08 00:00:04
2017-08-08 00:00:06,t-657740490-UserCluster1-sysadmin,2017-08-08 00:00:00,1.2,0.093147,9210,192.168.41.16,10156,2017-08-08 00:00:06
2017-08-08 00:00:09,t-657740490-UserCluster1-sysadmin,2017-08-08 00:00:00,1.2,0.089702,9255,192.168.41.16,10167,2017-08-08 00:00:09


<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Group Data by machine_id

In [9]:
def group_by_machine_id(cpu_main):
    """
        1. Group by machine_id, and then save it in machine_pair(dict type).
        2. The input cpu_main should be dataframe format.
        3. Check machine_variable_list to see 'key' of machine_df_pair(dict type).
    """
    machine_df_pair = {}
    machine_id_list = []
    machine_variable_list = []
    for index, row in (cpu_main.groupby(['machine_id']).count()).iterrows():
        machine_id_list.append(index)
    for m in machine_id_list:
        x = ('machine_{0}').format(m.split('-')[0])
        machine_variable_list.append(x)
        t = cpu_main[cpu_main.machine_id == m]
        #exec("%s = t" % (x))
        machine_df_pair[x] = t.sort_index()
    return machine_df_pair, machine_id_list, machine_variable_list

In [10]:
# Splited cpu_a.cpu_df into machine_groups. 
cpu_machine_dict, cpu_machine_id_list, cpu_machine_variable_list = group_by_machine_id(cpu_a.cpu_df)

In [11]:
print(cpu_machine_variable_list)
print(cpu_machine_id_list)

['machine_a', 'machine_b', 'machine_c', 'machine_i', 'machine_t', 'machine_z']
['a-957043145-UserCluster1-sysadmin', 'b-956223090-UserCluster1-sysadmin', 'c-959255288-UserCluster1-sysadmin', 'i-325376172-UserCluster1-sysadmin', 't-657740490-UserCluster1-sysadmin', 'z-323389049-UserCluster1-sysadmin']


In [12]:
# Example to get machine_a 's dataframe
cpu_machine_dict['machine_a'].head()

Unnamed: 0_level_0,machine_id,date,cpu_ghz,cpu_usage,idle,ip,total,timestampp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-08-04 07:07:51,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,1.777,0.028664,4778,192.168.41.15,4919,2017-08-04 07:07:51
2017-08-04 07:07:53,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,2.2,0.072883,4554,192.168.41.15,4912,2017-08-04 07:07:53
2017-08-04 07:07:55,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,2.2,0.065041,4600,192.168.41.15,4920,2017-08-04 07:07:55
2017-08-04 07:07:57,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,2.2,0.082418,4509,192.168.41.15,4914,2017-08-04 07:07:57
2017-08-04 07:07:59,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,1.892,0.076251,4543,192.168.41.15,4918,2017-08-04 07:07:59


<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);">  Basic Info Check

In [13]:
## Check if the splited dataframe still have the same total counts. 
# sum = 0
# for k,v in cpu_machine_dict.items():
#     sum += int(len(v))
# sum == len(cpu_main)  #True

In [14]:
## Check each machine's count   [ **Have to initialize the plot function below first.]
# P.pie_chart(label_list=cpu_machine_variable_list, 
#             value_list=[len(v) for k,v in cpu_machine_dict.items()]
#            )

# P.bar_chart(label_list=cpu_machine_variable_list, 
#             value_list=[len(v) for k,v in cpu_machine_dict.items()]
#            )

<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Select data by 'timeIndex'

In [15]:
def select_time_range_from_df(df, start_time, end_time):
    """
        ## Function to select data within a time-range.
        1. Input: df should have timestampIndex format index, which's datatime format is '%Y-%m-%d %H:%M:%S'.
        2. Input: start_time and end_time should be string format of '%Y-%m-%d %H:%M:%S'.
        3. Output: will return df within the time-range, ** start_time <= df < end_time **.
    """
    start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
    end_time  = datetime.datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S')
    df = df[((df.index >= start_time) & (df.index < end_time))]
    return df

In [16]:
## select_time_range_from_df Example
# select_time_range_from_df(cpu_machine_dict['machine_a'], '2017-08-04 07:08:00', '2017-08-04 07:09:00').head()

<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Resample(for cpu data using)

In [17]:
def resample_timeframe(machine_id_df, machine_id_df_string, timeframe='1Min'):
    
    """
        ## Resample data by specific timeframe, given with 'open, high, low, close, mean, median, std, count'.
        1. Input: machine_id_df(dataframe type) should have 'cpu_ghz, cpu_usage, idle, total' columns.
        2. Input: machine_id_df_string, just the name of which machine. 
        3. Input: timeframe: the size of the timeframe. check reference to learn more.
        4. Output: machine_id_split(dict type).
        5. Output: machine_id_split.keys() the key of machine_id_split(dict).
        6. Reference: http://benalexkeen.com/resampling-time-series-data-with-pandas/

    """
    # Collect splited data
    machine_id_split = {}
    
    # Create variable name 
    x1 = ('{0}_{1}_cpu_ghz_resample').format(machine_id_df_string, timeframe)
    x2 = ('{0}_{1}_cpu_usage_resample').format(machine_id_df_string, timeframe)
    x3 = ('{0}_{1}_idle_resample').format(machine_id_df_string, timeframe)
    x4 = ('{0}_{1}_total_resample').format(machine_id_df_string, timeframe)
    
    # Create resample ohlc in machine_id_split dict
    machine_id_split[x1] = machine_id_df['cpu_ghz'].resample(timeframe).ohlc()
    machine_id_split[x2] = machine_id_df['cpu_usage'].resample(timeframe).ohlc()
    machine_id_split[x3] = machine_id_df['idle'].resample(timeframe).ohlc()
    machine_id_split[x4] = machine_id_df['total'].resample(timeframe).ohlc()
    
    # Create resample mean in machine_id_split dict
    machine_id_split[x1]['mean'] = machine_id_df['cpu_ghz'].resample(timeframe).mean()
    machine_id_split[x2]['mean'] = machine_id_df['cpu_usage'].resample(timeframe).mean()
    machine_id_split[x3]['mean'] = machine_id_df['idle'].resample(timeframe).mean()
    machine_id_split[x4]['mean'] = machine_id_df['total'].resample(timeframe).mean()
    
    # Create resample median in machine_id_split dict
    machine_id_split[x1]['median'] = machine_id_df['cpu_ghz'].resample(timeframe).median()
    machine_id_split[x2]['median'] = machine_id_df['cpu_usage'].resample(timeframe).median()
    machine_id_split[x3]['median'] = machine_id_df['idle'].resample(timeframe).median()
    machine_id_split[x4]['median'] = machine_id_df['total'].resample(timeframe).median()
    
    # Create resample std in machine_id_split dict
    machine_id_split[x1]['std'] = machine_id_df['cpu_ghz'].resample(timeframe).std()
    machine_id_split[x2]['std'] = machine_id_df['cpu_usage'].resample(timeframe).std()
    machine_id_split[x3]['std'] = machine_id_df['idle'].resample(timeframe).std()
    machine_id_split[x4]['std'] = machine_id_df['total'].resample(timeframe).std()
    
    # Create resample std in machine_id_split dict
    machine_id_split[x1]['count'] = machine_id_df['cpu_ghz'].resample(timeframe).count()
    machine_id_split[x2]['count'] = machine_id_df['cpu_usage'].resample(timeframe).count()
    machine_id_split[x3]['count'] = machine_id_df['idle'].resample(timeframe).count()
    machine_id_split[x4]['count'] = machine_id_df['total'].resample(timeframe).count()
    
    return machine_id_split, machine_id_split.keys()

<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Create resample

In [18]:
# Example
machine_a_1m_resample, machine_a_1m_resample_keys = resample_timeframe(machine_id_df = cpu_machine_dict['machine_a'], 
                                                                       machine_id_df_string = 'machine_a', 
                                                                       timeframe='1Min')

In [19]:
# keys of machine_a_1m_resample 
machine_a_1m_resample_keys

dict_keys(['machine_a_1Min_idle_resample', 'machine_a_1Min_cpu_ghz_resample', 'machine_a_1Min_total_resample', 'machine_a_1Min_cpu_usage_resample'])

In [20]:
# Get specific one
machine_a_1m_resample['machine_a_1Min_cpu_usage_resample'].head()
# Below is machine_a's [1-Minute-resample-cpu_usage-data]. 

Unnamed: 0_level_0,open,high,low,close,mean,median,std,count
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-08-04 07:07:00,0.028664,0.082418,0.028664,0.076251,0.065051,0.072883,0.021286,5
2017-08-04 07:08:00,0.128606,0.207766,0.051945,0.207766,0.089384,0.072566,0.039618,29
2017-08-04 07:09:00,0.108887,0.108887,0.006188,0.027806,0.019476,0.017298,0.018195,28
2017-08-04 07:10:00,0.018175,0.030164,0.005802,0.015409,0.015619,0.015997,0.005951,29
2017-08-04 07:11:00,0.006805,0.031031,0.006805,0.008008,0.016537,0.016803,0.007315,29


In [21]:
# If you want to make sure if it was correct, you could use select_time_range_from_df fuction to check.
select_time_range_from_df(cpu_machine_dict['machine_a'], '2017-08-04 07:07:00', '2017-08-04 07:08:00')

Unnamed: 0_level_0,machine_id,date,cpu_ghz,cpu_usage,idle,ip,total,timestampp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-08-04 07:07:51,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,1.777,0.028664,4778,192.168.41.15,4919,2017-08-04 07:07:51
2017-08-04 07:07:53,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,2.2,0.072883,4554,192.168.41.15,4912,2017-08-04 07:07:53
2017-08-04 07:07:55,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,2.2,0.065041,4600,192.168.41.15,4920,2017-08-04 07:07:55
2017-08-04 07:07:57,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,2.2,0.082418,4509,192.168.41.15,4914,2017-08-04 07:07:57
2017-08-04 07:07:59,a-957043145-UserCluster1-sysadmin,2017-08-04 00:00:00,1.892,0.076251,4543,192.168.41.15,4918,2017-08-04 07:07:59


In [22]:
# More Examples
machine_a_10m_resample, machine_a_10m_resample_keys = resample_timeframe(cpu_machine_dict['machine_a'], 'machine_a', timeframe='10Min')
machine_a_1H_resample, machine_a_1H_resample_keys = resample_timeframe(cpu_machine_dict['machine_a'], 'machine_a', timeframe='1H')
machine_a_1D_resample, machine_a_1D_resample_keys = resample_timeframe(cpu_machine_dict['machine_a'], 'machine_a', timeframe='1D')
machine_b_10m_resample, machine_b_10m_resample_keys = resample_timeframe(cpu_machine_dict['machine_b'], 'machine_b', timeframe='10Min')
machine_b_1H_resample, machine_b_1H_resample_keys = resample_timeframe(cpu_machine_dict['machine_b'], 'machine_b', timeframe='1H')
machine_b_1D_resample, machine_b_1D_resample_keys = resample_timeframe(cpu_machine_dict['machine_b'], 'machine_b', timeframe='1D')
machine_c_10m_resample, machine_c_10m_resample_keys = resample_timeframe(cpu_machine_dict['machine_c'], 'machine_c', timeframe='10Min')
machine_c_1H_resample, machine_c_1H_resample_keys = resample_timeframe(cpu_machine_dict['machine_c'], 'machine_c', timeframe='1H')
machine_c_1D_resample, machine_c_1D_resample_keys = resample_timeframe(cpu_machine_dict['machine_c'], 'machine_c', timeframe='1D')

<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);"> Drawing Function In Jupyter notebook

In [23]:
# plotly 
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
%load_ext autoreload
%autoreload 2

# offline plot  
# 因為如果寫成func去調用plotly會無法出現，所以只好用offline的方式。
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [24]:
# plotly reference https://plot.ly/python/
class PlotlyDraw:
    
    """
        1. This is Plotly functions compliation.
        2. Please use python 3.5 to avoid problems (CANT be 3.6).
        
    """
    def __init__(self):
        self.color_list = ['#F38181', '#FCE38A', '#EAFFD0', '#95E1D3', '#A6D0E4', '#D5DEF5', '#D4A5A5']

    def bar_chart(self, label_list, value_list, color_list=[]):
        """
            1. label_list <- list of lable(string type)
            2. value_list <_ list of value(number type)
        """
        if color_list == []:
            color_list = self.color_list
        trace = go.Bar(
            x = label_list,
            y = value_list,
            marker = dict(color=color_list)
        )
        iplot([trace], filename='styled_bar_chart')
        
    def pie_chart(self, label_list, value_list, color_list=[]):
        """
            1. label_list <- list of lable(string type)
            2. value_list <_ list of value(number type)
        """
        if color_list == []:
            color_list = self.color_list
        trace = go.Pie(labels=label_list, 
                       values=value_list,
                       hoverinfo='label+percent', 
                       textinfo='label+value+percent', 
                       textfont=dict(size=20),
                       marker=dict(colors=color_list, 
                                   line=dict(color='#000000', width=2)))
        iplot([trace], filename='styled_pie_chart')

    def scatter_plot_oneDF(self, df, chart_name, trace_x_name, trace_y_name):
        """
            1. Data should be only one dataframe.
            2. Chart_name <- (string type)
            3. trace_x_name <- one of the df's column's name.(string type)
            4. trace_y_name <- one of the df's column's name.(string type)
        """
        # Create a trace
        trace = go.Scatter(
            x = df[trace_x_name],
            y = df[trace_y_name],
            mode = 'markers',
            marker = dict(
                size = 10,
                color = self.color_list[0],
                line = dict(
                    width = 2,
                    color = 'rgb(0, 0, 0)'
                )
            )
        )
        layout = dict(title=chart_name)
        data = [trace]
        fig = dict(data=data, layout=layout)
        iplot(data, filename='basic-scatter')

    def scatter_case(self, trace1_x, trace1_y, trace2_x, trace2_y, trace3_x, trace3_y,
                     trace1_title='trace1_title', trace2_title='trace2_title', trace3_title='trace3_title',
                     all_title='trace_compliation'):
        
        """
            0. Well....plz input 3 traces = =, no time to modify i
            1. Data could come from different dataframes.
            2. tracen_x, tracen_y <- should be in same df, with same long.(both df['xxxx'] type)
        """
        trace_1 = go.Scatter(
            x = trace1_x,
            y = trace1_y,
            name = trace1_title,
            mode = 'markers',
            marker = dict(
                size = 10,
                color = self.color_list[0],
                line = dict(
                    width = 2,
                    color = 'rgb(0, 0, 0)'
                )
            )
        )

        trace_2 = go.Scatter(
            x = trace2_x,
            y = trace2_y,
            name = trace2_title,
            mode = 'markers',
            marker = dict(
                size = 10,
                color = self.color_list[1],
                line = dict(
                    width = 2,
                    color = 'rgb(0, 0, 0)'
                )
            )
        )

        trace_3 = go.Scatter(
            x = trace3_x,
            y = trace3_y,
            name = trace3_title,
            mode = 'markers',
            marker = dict(
                size = 10,
                color = self.color_list[2],
                line = dict(
                    width = 2,
                    color = 'rgb(0, 0, 0)'
                )
            )
        )

        data = [trace_1, trace_2, trace_3]

        updatemenus = list([
            dict(active=-1,
                 buttons=list([   
                    dict(label = 'all',
                         method = 'update',
                         args = [{'visible': [True, True, True]},
                                 {'title': all_title}]),
                    dict(label = 'trace_1',
                         method = 'update',
                         args = [{'visible': [True, False, False]},
                                 {'title': trace1_title}]),
                    dict(label = 'trace_2',
                         method = 'update',
                         args = [{'visible': [False, True, False]},
                                 {'title': trace2_title}]),
                    dict(label = 'trace_3',
                         method = 'update',
                         args = [{'visible': [False, False, True,]},
                                 {'title': trace3_title}]),
                ]),
            )
        ])

        layout = dict(title=all_title, 
                      showlegend=True,
                      updatemenus=updatemenus)

        fig = dict(data=data, layout=layout)
        iplot(fig, filename='update_dropdown')
        
    def ohlc_chart(self, df_with_olhc):
        """
            0. py3.6 will calls problem, plz use python 3.5.
            1. df_with_ohlc <- dataframe type.
            2. Data columns with index(timestampIndex), open, high, low, close.
        """
        trace = go.Ohlc(x=df_with_olhc.index,
                        open=df_with_olhc.open,
                        high=df_with_olhc.high,
                        low=df_with_olhc.low,
                        close=df_with_olhc.close)
        iplot([trace], filename='simple_ohlc')
        
    def candlestick_chart(self, df_with_olhc):
        """
            0. py3.6 will calls problem, plz use python 3.5.
            1. df_with_ohlc <- dataframe type.
            2. Data columns with index(timestampIndex), open, high, low, close.
        """
        trace = go.Candlestick(x=df_with_olhc.index,
                        open=df_with_olhc.open,
                        high=df_with_olhc.high,
                        low=df_with_olhc.low,
                        close=df_with_olhc.close)
        iplot([trace], filename='simple_candlestick')
        
    def timeseries_chart_OneDF(self, df, chart_name ='', trace_1_name='', trace_2_name='', trace_3_name='', trace_4_name=''):
        """ 
            0. py3.6 will calls problem, plz use python 3.5.
            1. Data should be only one dataframe.
            2. Should have index(timestampIndex) as x.
            3. Import the 'columns name' as y.
            4. Could be 4 traces at the most.
        """
        
        if chart_name:
            chart_name = chart_name
        else:
            chart_name = "Time Series with Rangeslider"
        data = []
        if trace_1_name:
            trace_1 = go.Scatter(
                x=df.index,
                y=df[trace_1_name],
                name = trace_1_name,
                line = dict(color = self.color_list[0]),
                opacity = 0.8)
            data.append(trace_1)
        if trace_2_name:
            trace_2 = go.Scatter(
                x=df.index,
                y=df[trace_2_name],
                name = trace_2_name,
                line = dict(color = self.color_list[1]),
                opacity = 0.8)
            data.append(trace_2)
        if trace_3_name:
            trace_3 = go.Scatter(
                x=df.index,
                y=df[trace_3_name],
                name = trace_3_name,
                line = dict(color = self.color_list[3]),
                opacity = 0.8)
            data.append(trace_3)
        if trace_4_name:
            trace_4 = go.Scatter(
                x=df.index,
                y=df[trace_4_name],
                name = trace_4_name,
                line = dict(color = self.color_list[5]),
                opacity = 0.8)
            data.append(trace_4)

        layout = dict(
            title=chart_name,
            xaxis=dict(
                rangeselector=dict(
                    buttons=list([
                        dict(count=1,
                             label='1min',
                             step='minute',
                             stepmode='backward'),
                        dict(count=10,
                             label='10mins',
                             step='minute',
                             stepmode='backward'),
                        dict(count=1,
                             label='1hour',
                             step='hour',
                             stepmode='backward'),
                        dict(count=6,
                             label='6hours',
                             step='hour',
                             stepmode='backward'),
                        dict(count=12,
                             label='12hours',
                             step='hour',
                             stepmode='backward'),
                        dict(count=1,
                             label='1day',
                             step='day',
                             stepmode='backward'),
                        dict(count=1,
                             label='1m',
                             step='month',
                             stepmode='backward'),
                        dict(count=6,
                             label='6m',
                             step='month',
                             stepmode='backward'),
                        dict(step='all')
                    ])
                ),
                rangeslider=dict(),
                type='date'
            )
        )

        fig = dict(data=data, layout=layout)
        iplot(fig, filename=chart_name)
        
    def timeseries_chart_MultiDF(self, chart_name ='', 
                                 trace1_df='', trace1_y='', trace1_name='',
                                 trace2_df='', trace2_y='', trace2_name='',
                                 trace3_df='', trace3_y='', trace3_name='',
                                 trace4_df='', trace4_y='', trace4_name=''):
        """ 
            0. py3.6 will calls problem, plz use python 3.5.
            1. Data could come from different dataframes.
            2. Should have index(timestampIndex) as x.
            3. tracen_df(dataframe type).
            4. Import the 'columns name' as y(string type).
            5. Could be 4 traces at the most.
        """
        
        if chart_name:
            chart_name = chart_name
        else:
            chart_name = "Time Series with Rangeslider"
        data = []
        if trace1_y:
            trace_1 = go.Scatter(
                x=trace1_df.index,
                y=trace1_df[trace1_y],
                name = trace1_name,
                line = dict(color = self.color_list[0]),
                opacity = 0.8)
            data.append(trace_1)
        if trace2_y:
            trace_2 = go.Scatter(
                x=trace2_df.index,
                y=trace2_df[trace2_y],
                name = trace2_name,
                line = dict(color = self.color_list[1]),
                opacity = 0.8)
            data.append(trace_2)
        if trace3_y:
            trace_3 = go.Scatter(
                x=trace3_df.index,
                y=trace3_df[trace3_y],
                name = trace3_name,
                line = dict(color = self.color_list[3]),
                opacity = 0.8)
            data.append(trace_3)
        if trace4_y:
            trace_4 = go.Scatter(
                x=trace4_df.index,
                y=trace4_df[trace4_y],
                name = trace4_name,
                line = dict(color = self.color_list[5]),
                opacity = 0.8)
            data.append(trace_4)

        layout = dict(
            title=chart_name,
            xaxis=dict(
                rangeselector=dict(
                    buttons=list([
                        dict(count=1,
                             label='1min',
                             step='minute',
                             stepmode='backward'),
                        dict(count=10,
                             label='10mins',
                             step='minute',
                             stepmode='backward'),
                        dict(count=1,
                             label='1hour',
                             step='hour',
                             stepmode='backward'),
                        dict(count=6,
                             label='6hours',
                             step='hour',
                             stepmode='backward'),
                        dict(count=12,
                             label='12hours',
                             step='hour',
                             stepmode='backward'),
                        dict(count=1,
                             label='1day',
                             step='day',
                             stepmode='backward'),
                        dict(count=1,
                             label='1m',
                             step='month',
                             stepmode='backward'),
                        dict(count=6,
                             label='6m',
                             step='month',
                             stepmode='backward'),
                        dict(step='all')
                    ])
                ),
                rangeslider=dict(),
                type='date'
            )
        )

        fig = dict(data=data, layout=layout)
        iplot(fig, filename=chart_name)
P = PlotlyDraw()

<h3 style="background-image: linear-gradient( 135deg, #52E5E7 10%, #130CB7 100%);">Drawing example

In [25]:
# bar_chart
label_list = ['a', 'b', 'c']
value_list = [3, 5, 5]
color_list = ['#95E1D3', '#A6D0E4', '#D5DEF5']
P.bar_chart(label_list=label_list,
            value_list=value_list,
            color_list=color_list)

In [26]:
# pie chart
label_list = ['a', 'b', 'c']
value_list = [3, 5, 5]
color_list = ['#95E1D3', '#A6D0E4', '#D5DEF5']
P.pie_chart(label_list=label_list,
            value_list=value_list,
            color_list=color_list)

In [27]:
# candlestick_chart 
P.candlestick_chart(df_with_olhc=machine_a_1D_resample['machine_a_1D_cpu_usage_resample'])

In [28]:
# ohlc_chart
P.ohlc_chart(df_with_olhc=machine_a_1D_resample['machine_a_1D_cpu_usage_resample'])

In [29]:
# scatter_case
P.scatter_case(
    trace1_x=machine_a_10m_resample['machine_a_10Min_cpu_ghz_resample']['close'], 
    trace1_y=machine_a_10m_resample['machine_a_10Min_cpu_usage_resample']['close'], 
    trace2_x=machine_b_10m_resample['machine_b_10Min_cpu_ghz_resample']['close'], 
    trace2_y=machine_b_10m_resample['machine_b_10Min_cpu_usage_resample']['close'], 
    trace3_x=machine_c_10m_resample['machine_c_10Min_cpu_ghz_resample']['close'], 
    trace3_y=machine_c_10m_resample['machine_c_10Min_cpu_usage_resample']['close'],
    trace1_title='a 10Mins cpu_ghz(x) vs cpu_usage(y)', 
    trace2_title='b 10Mins cpu_ghz(x) vs cpu_usage(y)',
    trace3_title='c 10Mins cpu_ghz(x) vs cpu_usage(y)',
    all_title = 'machine_compliation - 10Mins cpu_ghz(x) vs cpu_usage(y)'
)

In [30]:
# timeseries_chart_OneDF
P.timeseries_chart_OneDF(chart_name='testPlot', 
                         df=machine_a_1H_resample['machine_a_1H_cpu_usage_resample'],
                         trace_1_name='open',
                         trace_2_name='high',
                         trace_3_name='low',
                         trace_4_name='mean')

In [31]:
# timeseries_chart_MultiDF
P.timeseries_chart_MultiDF(chart_name='machine_a_1H_cpu_usage', 
                           trace1_df=select_time_range_from_df(machine_a_1H_resample['machine_a_1H_cpu_usage_resample'], '2017-08-04 00:00:00', '2017-08-05 00:00:00'),
                           trace2_df=select_time_range_from_df(machine_a_1H_resample['machine_a_1H_cpu_usage_resample'], '2017-08-05 00:00:00', '2017-08-06 00:00:00'),
                           trace3_df=select_time_range_from_df(machine_a_1H_resample['machine_a_1H_cpu_usage_resample'], '2017-08-06 00:00:00', '2017-08-07 00:00:00'),
                           trace4_df=select_time_range_from_df(machine_a_1H_resample['machine_a_1H_cpu_usage_resample'], '2017-08-07 00:00:00', '2017-08-08 00:00:00'),
                           trace1_y='median', trace2_y='median', trace3_y='median', trace4_y='median',
                           trace1_name='0804(五)cpu usage', trace2_name='0805(六)cpu usage', trace3_name='0806(日)cpu usage',
                           trace4_name='0807(一)cpu usage'
                           )