In [27]:
import pandas as pd

import bokeh.charts
import bokeh.charts.utils
import bokeh.io
import bokeh.models
import bokeh.palettes
import bokeh.plotting

bokeh.io.output_notebook()

In [28]:
df_store = pd.read_csv('store_data.csv', encoding = "ISO-8859-1", parse_dates=['Order Date'])

In [29]:
df_store.head()

Unnamed: 0,Row ID,Order Priority,Discount,Unit Price,Shipping Cost,Customer ID,Customer Name,Ship Mode,Customer Segment,Product Category,...,Region,State or Province,City,Postal Code,Order Date,Ship Date,Profit,Quantity ordered new,Sales,Order ID
0,20847,High,0.01,2.84,0.93,3,Bonnie Potter,Express Air,Corporate,Office Supplies,...,West,Washington,Anacortes,98221,2015-01-07,1/8/15,4.56,4,13.01,88522
1,20228,Not Specified,0.02,500.98,26.0,5,Ronnie Proctor,Delivery Truck,Home Office,Furniture,...,West,California,San Gabriel,91776,2015-06-13,6/15/15,4390.3665,12,6362.85,90193
2,21776,Critical,0.06,9.48,7.29,11,Marcus Dunlap,Regular Air,Home Office,Furniture,...,East,New Jersey,Roselle,7203,2015-02-15,2/17/15,-53.8096,22,211.15,90192
3,24844,Medium,0.09,78.69,19.99,14,Gwendolyn F Tyson,Regular Air,Small Business,Furniture,...,Central,Minnesota,Prior Lake,55372,2015-05-12,5/14/15,803.4705,16,1164.45,86838
4,24846,Medium,0.08,3.28,2.31,14,Gwendolyn F Tyson,Regular Air,Small Business,Office Supplies,...,Central,Minnesota,Prior Lake,55372,2015-05-12,5/13/15,-24.03,7,22.23,86838


In [30]:
df_profit = pd.DataFrame(df_store.groupby(['Order Date'])['Profit'].sum())
df_profit = df_profit.reset_index(drop=False)
df_profit.head()

Unnamed: 0,Order Date,Profit
0,2015-01-01,-1957.0881
1,2015-01-02,4732.33432
2,2015-01-03,-1846.34822
3,2015-01-04,516.2219
4,2015-01-05,85.877312


In [31]:
p = bokeh.charts.Line(df_profit, x='Order Date', y='Profit', plot_width=400, plot_height=200)
bokeh.io.show(p)

In [32]:
df_profit_by_segment = pd.DataFrame(df_store.groupby(['Customer Segment'])['Profit'].sum())
df_profit_by_segment = df_profit_by_segment.reset_index(drop=False)
df_profit_by_segment.head()

Unnamed: 0,Customer Segment,Profit
0,Consumer,49894.990273
1,Corporate,54444.191002
2,Home Office,54433.520869
3,Small Business,65304.909691


In [33]:
df_date_profit_by_segment = pd.DataFrame(df_store.groupby(['Customer Segment', 'Order Date'])['Profit'].sum())
df_date_profit_by_segment = df_date_profit_by_segment.reset_index(drop=False)
df_date_profit_by_segment.head()

Unnamed: 0,Customer Segment,Order Date,Profit
0,Consumer,2015-01-01,-61.194
1,Consumer,2015-01-02,2628.05002
2,Consumer,2015-01-03,90.6246
3,Consumer,2015-01-05,346.5691
4,Consumer,2015-01-06,-733.7028


In [34]:
df_monthly_segment_profit = df_date_profit_by_segment.set_index('Order Date').groupby('Customer Segment').resample('M').sum()
df_monthly_segment_profit = df_monthly_segment_profit.reset_index(drop=False)
df_monthly_segment_profit.head()

Unnamed: 0,Customer Segment,Order Date,Profit
0,Consumer,2015-01-31,15638.10768
1,Consumer,2015-02-28,5722.565556
2,Consumer,2015-03-31,-12732.19664
3,Consumer,2015-04-30,19586.214664
4,Consumer,2015-05-31,8373.289823


In [35]:
from bokeh.charts import Area, show, output_file

# create some example data
data = dict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
)
area = Area(data, title="Area Chart", legend="top_left",
            xlabel='time', ylabel='memory')

show(area)

In [36]:
area = Area(df_profit, title='Profit Area Chart', legend='top_left',
           xlabel='time', ylabel='profit')
show(area)

In [37]:
area = Area(df_monthly_segment_profit, title='Profit Area Chart', legend='top_left',
           xlabel='time', ylabel='profit')
show(area)

In [38]:
from bokeh.charts import Bar
bar = Bar(df_monthly_segment_profit, xlabel = 'time', values = 'Profit', agg='sum', group='Customer Segment',
            title='Revenue by Customer Segment', legend='top_left', tools='crosshair')
show(bar)

In [39]:
data

{'jython': [22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
 'pypy': [12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
 'python': [2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111]}

In [40]:
profit_dict = df_monthly_segment_profit.set_index('Customer Segment').to_dict()
profit_dict

{'Order Date': {'Consumer': Timestamp('2015-06-30 00:00:00'),
  'Corporate': Timestamp('2015-06-30 00:00:00'),
  'Home Office': Timestamp('2015-06-30 00:00:00'),
  'Small Business': Timestamp('2015-06-30 00:00:00')},
 'Profit': {'Consumer': 13307.009189999997,
  'Corporate': 16635.098344999999,
  'Home Office': 23036.523299999997,
  'Small Business': 13858.339775}}

In [41]:
from bokeh.charts import Histogram
hist = Histogram(df_monthly_segment_profit, values='Profit',
                 color='Customer Segment', bins=5)
show(hist)

In [42]:
df_monthly_segment_profit.head()

Unnamed: 0,Customer Segment,Order Date,Profit
0,Consumer,2015-01-31,15638.10768
1,Consumer,2015-02-28,5722.565556
2,Consumer,2015-03-31,-12732.19664
3,Consumer,2015-04-30,19586.214664
4,Consumer,2015-05-31,8373.289823


In [43]:
df_mo_seg = df_monthly_segment_profit.set_index(['Order Date']).resample('M').sum()
df_mo_seg.head()

Unnamed: 0_level_0,Profit
Order Date,Unnamed: 1_level_1
2015-01-31,1043.6775
2015-02-28,35944.658781
2015-03-31,103.159587
2015-04-30,53146.4125
2015-05-31,67002.732858


In [44]:
area3 = Area(df_mo_seg, title='Area Chart', legend='top_left',
            xlabel='time', ylabel='profit')
show(area3)

In [45]:
df_rearange = df_monthly_segment_profit[['Order Date', 'Customer Segment', 'Profit']].set_index('Order Date')
df_rearange.head()

Unnamed: 0_level_0,Customer Segment,Profit
Order Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-31,Consumer,15638.10768
2015-02-28,Consumer,5722.565556
2015-03-31,Consumer,-12732.19664
2015-04-30,Consumer,19586.214664
2015-05-31,Consumer,8373.289823


In [46]:
df_rearange.dtypes

Customer Segment     object
Profit              float64
dtype: object

In [47]:
from bokeh.plotting import figure
profit_plot = figure()
profit_plot.circle(x=df_rearange.Profit, y=df_rearange.Profit)
show(profit_plot)

In [48]:
profit_dict2 = df_rearange.set_index('Customer Segment').to_dict()
profit_dict2

{'Profit': {'Consumer': 13307.009189999997,
  'Corporate': 16635.098344999999,
  'Home Office': 23036.523299999997,
  'Small Business': 13858.339775}}

In [49]:
df_monthly_segment_profit.head()

Unnamed: 0,Customer Segment,Order Date,Profit
0,Consumer,2015-01-31,15638.10768
1,Consumer,2015-02-28,5722.565556
2,Consumer,2015-03-31,-12732.19664
3,Consumer,2015-04-30,19586.214664
4,Consumer,2015-05-31,8373.289823


In [64]:
df_seg_prof = df_monthly_segment_profit[['Segment', 'Profit']]
# df_seg_prof = df_seg_prof.set_index('Segment')
df_seg_prof.head()

Unnamed: 0,Segment,Profit
0,Consumer,15638.10768
1,Consumer,5722.565556
2,Consumer,-12732.19664
3,Consumer,19586.214664
4,Consumer,8373.289823


In [76]:
profit_dict3 = df_seg_prof.to_dict(orient='records')
profit_dict3

[{'Profit': 15638.107680000001, 'Segment': 'Consumer'},
 {'Profit': 5722.565555599999, 'Segment': 'Consumer'},
 {'Profit': -12732.196639999998, 'Segment': 'Consumer'},
 {'Profit': 19586.214664, 'Segment': 'Consumer'},
 {'Profit': 8373.289823000001, 'Segment': 'Consumer'},
 {'Profit': 13307.009189999997, 'Segment': 'Consumer'},
 {'Profit': -10171.133205000004, 'Segment': 'Corporate'},
 {'Profit': 11425.871436999998, 'Segment': 'Corporate'},
 {'Profit': -2404.7714060000053, 'Segment': 'Corporate'},
 {'Profit': 10885.992695999998, 'Segment': 'Corporate'},
 {'Profit': 28073.133135400003, 'Segment': 'Corporate'},
 {'Profit': 16635.098345, 'Segment': 'Corporate'},
 {'Profit': -6485.184702000003, 'Segment': 'Home Office'},
 {'Profit': 18223.8647709, 'Segment': 'Home Office'},
 {'Profit': 12486.9506, 'Segment': 'Home Office'},
 {'Profit': -1634.4059399999967, 'Segment': 'Home Office'},
 {'Profit': 8805.77284, 'Segment': 'Home Office'},
 {'Profit': 23036.523299999997, 'Segment': 'Home Office'},

In [77]:
area6 = Area(profit_dict3, title="Area Chart", legend="top_left",
            xlabel='time', ylabel='memory', color='Segment')

show(area6)

In [69]:
df_monthly_segment_profit.head()

Unnamed: 0,Segment,OrderDate,Profit
0,Consumer,2015-01-31,15638.10768
1,Consumer,2015-02-28,5722.565556
2,Consumer,2015-03-31,-12732.19664
3,Consumer,2015-04-30,19586.214664
4,Consumer,2015-05-31,8373.289823


In [83]:
from collections import OrderedDict

# Get the unordered dictionary
test_order = [OrderedDict(row) for i, row in df_monthly_segment_profit.iterrows()]

In [84]:
area7 = Area(test_order, title="Area Chart", legend="top_left",
            xlabel='time', ylabel='memory', color='Segment')

show(area7)

In [70]:
df_rename_cols = df_monthly_segment_profit
df_rename_cols.columns = ['Segment', 'OrderDate', 'Profit']
df_rename_cols.head()

Unnamed: 0,Segment,OrderDate,Profit
0,Consumer,2015-01-31,15638.10768
1,Consumer,2015-02-28,5722.565556
2,Consumer,2015-03-31,-12732.19664
3,Consumer,2015-04-30,19586.214664
4,Consumer,2015-05-31,8373.289823


In [71]:
profit_plot2 = figure()
profit_plot2.circle(x=df_rename_cols.Profit, y=df_rename_cols.OrderDate)
show(profit_plot2)

In [72]:
area5 = Area(df_rename_cols, title='Area Chart', legend='top_left',
            xlabel='time', ylabel='profit', color = 'Segment')
show(area5)

In [73]:
# grouping by date and segment creates series
df_seg_summary = df_monthly_segment_profit.groupby(['Order Date', 'Customer Segment'])['Profit'].sum()
type(df_seg_summary)

KeyError: 'Order Date'

In [74]:
# converting series to dataframe object
df_seg_summary2 = df_seg_summary.to_frame()
df_seg_summary2.head()

NameError: name 'df_seg_summary' is not defined

In [75]:
area4 = Area(df_seg_summary2, title='Area Chart', legend='top_left',
            xlabel='time', ylabel='profit')
show(area4)

NameError: name 'df_seg_summary2' is not defined