In [1]:
# General Dependencies

import numpy as np
import pandas as pd

# Warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read the data
df_data = pd.read_csv('data/data_ready.csv')
print('Dataframe dimensions:', df_data.shape)

Dataframe dimensions: (1781, 18)


In [3]:
# Convert column 'Order Date' to datetime format, and remove empty rows

df_data['Order Date'] = pd.to_datetime(df_data['Order Date'])

df_data = df_data.dropna(how='all')
print('Dataframe dimensions:', df_data.shape)

Dataframe dimensions: (1752, 18)


In [14]:
df_2017 =  df_data[df_data['Year'] == 2017]
df_2018 =  df_data[df_data['Year'] == 2018]
df_2017

Unnamed: 0,Stock ID,Cost of Goods,Unit Price,Shipping Cost,Customer ID,Vendor,Order Status,Product Category,Brand,Product Description,Product Base Margin,Order Date,Profit,Quantity,Sales,Order ID,Month,Year
0,171003,76.0,99.0,15.0,VVIP1017,DIRECT,finished,Clothing,Madewell,Madewell Trevi Drapey Blazer Suit Black 2 Last...,8%,2017-01-01,8.0,1.0,99.0,2017013,1.0,2017.0
1,171004,87.0,120.0,22.5,L1631,DIRECT,finished,Shoes,UGG,UGG Australia – Joey Leather & Genuine Shearli...,9%,2017-01-01,10.5,1.0,120.0,2017014,1.0,2017.0
2,171005,87.0,120.0,22.5,L1631,DIRECT,finished,Shoes,UGG,Ugg red short paragraph 7,9%,2017-01-01,10.5,1.0,120.0,2017015,1.0,2017.0
3,171006,113.0,193.0,15.0,H1270,DIRECT,finished,Clothing,Icebreaker,Set of icebreaker ladies zipper blouse grey s ...,34%,2017-01-01,65.0,1.0,193.0,2017016,1.0,2017.0
4,171007,33.0,107.0,15.0,L1609,DIRECT,finished,Clothing,Anthropologies,Anthropological powder purple sweater s,55%,2017-01-01,59.0,1.0,107.0,2017017,1.0,2017.0
5,171008,33.0,107.0,15.0,L1367,DIRECT,finished,Clothing,Anthropologies,Anthropological powder purple sweater m short ...,55%,2017-01-01,59.0,1.0,107.0,2017018,1.0,2017.0
6,171009,33.0,107.0,15.0,VVIP1017,DIRECT,finished,Clothing,Anthropologies,Anthropological powder purple sweater s,55%,2017-01-01,59.0,1.0,107.0,2017019,1.0,2017.0
7,171010,36.0,64.0,7.5,L1126,DIRECT,finished,Clothing,Tory Burch,Tory sports white top s number,32%,2017-01-01,20.5,1.0,64.0,20170110,1.0,2017.0
8,171011,33.0,107.0,15.0,L1288,DIRECT,finished,Clothing,Anthropologies,Anthropological powder purple sweater s,55%,2017-01-01,59.0,1.0,107.0,20170111,1.0,2017.0
9,171012,31.0,80.0,7.5,L1126,DIRECT,finished,Clothing,Scotch Soda,Scotch soda camel hat,52%,2017-01-01,41.5,1.0,80.0,20170112,1.0,2017.0


## Brand Analysis

Let's check which brands are more profitable, and which brands Lei sells more.

In [18]:
# Building a Brand Dataset

def Brand(df):
    
    df_orders_brand = df.groupby(by='Brand', as_index=False)['Order ID'].count()
    df_orders_brand.columns = ['Brand', 'NumOrders']

    df_total_cost = df.groupby(by='Brand', as_index=False)['Cost of Goods'].sum()
    df_total_cost.columns = ['Brand', 'TotalCost']
    
    df_total_price = df.groupby(by='Brand', as_index=False)['Unit Price'].sum()
    df_total_price.columns = ['Brand', 'TotalPrice']


    df_total_sales = df.groupby(by='Brand', as_index=False)['Sales'].sum()
    df_total_sales.columns = ['Brand', 'TotalSales']

    df_total_profit = df.groupby(by='Brand', as_index=False)['Profit'].sum()
    df_total_profit.columns = ['Brand', 'TotalProfit']

    df_total_qty = df.groupby(by='Brand', as_index=False)['Quantity'].sum()
    df_total_qty.columns = ['Brand', 'TotalQuantity']
    
    # Merge all the dataframes in one.

    df_brand = df_orders_brand.merge(df_total_qty, on="Brand")
    df_brand = df_brand.merge(df_total_cost, on="Brand")
    df_brand = df_brand.merge(df_total_price, on="Brand")
    df_brand = df_brand.merge(df_total_sales, on="Brand" )
    df_brand = df_brand.merge(df_total_profit, on="Brand" )
    
    return(df_brand)


In [19]:
df_brand17 = Brand(df_2017)
df_brand18 = Brand(df_2018)

In [20]:
df_brand17.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalPrice,TotalSales,TotalProfit
count,120.0,120.0,120.0,120.0,120.0,120.0
mean,11.908333,12.033333,2144.156667,3015.383333,4576.216667,1354.180833
std,22.093003,22.308733,5482.949351,7751.12539,21932.2835,8483.535603
min,1.0,1.0,1.0,26.0,26.0,0.0
25%,1.0,1.0,136.75,225.0,225.0,47.625
50%,3.0,3.5,551.0,714.0,714.0,131.5
75%,11.25,12.0,1930.25,2765.25,2765.25,667.625
max,124.0,128.0,47673.8,62139.0,232090.0,92286.0


In [21]:
df_brand17.sort_values(by=['TotalCost'], ascending=False ).head()

Unnamed: 0,Brand,NumOrders,TotalQuantity,TotalCost,TotalPrice,TotalSales,TotalProfit
102,Stuart Weitzman,123,128.0,47673.8,62139.0,63925.0,12033.7
57,Lamer/La Prairie/Chantecaille,2,9.0,31014.0,51690.0,232090.0,92286.0
111,Tory Burch,79,79.0,12762.1,16788.0,16788.0,2740.9
21,Bvlgari,31,31.0,9291.0,13585.0,13585.0,4126.5
25,Canada Goose,12,12.0,9246.0,10573.0,10573.0,1002.0


#### These are outliers for brands of 2017
We removed Stuart Weitzman and Lamer/LaPrairie brands to get a better forecasting 

In [22]:
df_brand17 = df_brand17[(df_brand17.TotalSales < 63925 )]
df_brand17.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalPrice,TotalSales,TotalProfit
count,118.0,118.0,118.0,118.0,118.0,118.0
mean,11.050847,11.076271,1513.652542,2101.838983,2145.177966,493.067797
std,19.732765,19.754544,2309.659829,3184.114698,3281.401086,897.899106
min,1.0,1.0,1.0,26.0,26.0,0.0
25%,1.0,1.0,136.25,223.0,223.0,46.875
50%,3.0,3.0,508.5,690.5,690.5,129.25
75%,11.0,11.75,1873.5,2548.5,2548.5,643.375
max,124.0,124.0,12762.1,16788.0,16788.0,5791.5


In [23]:
df_brand18.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalPrice,TotalSales,TotalProfit
count,55.0,55.0,55.0,55.0,55.0,55.0
mean,5.872727,6.072727,1514.047273,2095.0,2134.472727,514.752727
std,7.620767,7.610554,2172.242152,2925.529784,2957.341836,799.596361
min,1.0,1.0,16.0,42.0,47.0,7.0
25%,1.0,1.0,139.0,229.0,269.5,68.0
50%,3.0,3.0,726.0,1066.0,1066.0,209.0
75%,6.0,7.0,1718.0,2158.0,2158.0,519.0
max,42.0,42.0,12315.0,15370.0,15370.0,3386.0


In [24]:
df_brand18.sort_values(by=['TotalCost'], ascending=False ).head()

Unnamed: 0,Brand,NumOrders,TotalQuantity,TotalCost,TotalPrice,TotalSales,TotalProfit
29,Lamer/La Prairie/Chantecaille,1,1.0,12315.0,15370.0,15370.0,3050.0
46,Stuart Weitzman,22,22.0,6492.0,10355.0,10355.0,3386.0
33,Macy Mcooy,3,3.0,5449.6,6812.0,6812.0,1302.4
23,Jcrew,1,1.0,4990.0,5870.0,5870.0,875.0
35,Maje,42,42.0,4588.0,7817.0,7817.0,2754.0


#### These are outliers for brands of 2018
We removed Lamer/LaPrairie brand to get a better forecasting


In [25]:
df_brand18 = df_brand18[(df_brand18.TotalSales < 15370 )]
df_brand18.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalPrice,TotalSales,TotalProfit
count,54.0,54.0,54.0,54.0,54.0,54.0
mean,5.962963,6.166667,1314.02963,1849.166667,1889.37037,467.803704
std,7.662609,7.649763,1601.799585,2309.463964,2354.657282,726.56634
min,1.0,1.0,16.0,42.0,47.0,7.0
25%,1.0,1.25,138.5,214.5,263.75,67.5
50%,3.0,3.5,717.0,1023.5,1023.5,178.0
75%,6.0,7.0,1672.75,2117.0,2117.0,502.0
max,42.0,42.0,6492.0,10355.0,10355.0,3386.0


In [26]:
# Save Brands datasets to csv file for Forecasting

df_brand17.to_csv('data/brand17.csv',index=False)
df_brand18.to_csv('data/brand18.csv',index=False)