In [1]:
# General Dependencies

import numpy as np
import pandas as pd

# Warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read the data
df_data = pd.read_csv('data/data_ready.csv')
print('Dataframe dimensions:', df_data.shape)

Dataframe dimensions: (1781, 18)


In [3]:
# Convert column 'Order Date' to datetime format, and remove empty rows

df_data['Order Date'] = pd.to_datetime(df_data['Order Date'])

df_data = df_data.dropna(how='all')
print('Dataframe dimensions:', df_data.shape)

Dataframe dimensions: (1752, 18)


In [4]:
df_2017 =  df_data[df_data['Year'] == 2017]
df_2018 =  df_data[df_data['Year'] == 2018]

## Brand Analysis

Let's check which brands are more profitable, and which brands Lei sells more.

In [5]:
# Building a Brand Dataset

def Brand(df):
    
    df_orders_brand = df.groupby(by='Brand', as_index=False)['Order ID'].count()
    df_orders_brand.columns = ['Brand', 'NumOrders']

    df_total_cost = df.groupby(by='Brand', as_index=False)['Cost of Goods'].sum()
    df_total_cost.columns = ['Brand', 'TotalCost']

    df_total_sales = df.groupby(by='Brand', as_index=False)['Sales'].sum()
    df_total_sales.columns = ['Brand', 'TotalSales']

    df_total_profit = df.groupby(by='Brand', as_index=False)['Profit'].sum()
    df_total_profit.columns = ['Brand', 'TotalProfit']

    df_total_qty = df.groupby(by='Brand', as_index=False)['Quantity'].sum()
    df_total_qty.columns = ['Brand', 'TotalQuantity']
    
    # Merge all the dataframes in one.

    df_brand = df_orders_brand.merge(df_total_qty, on="Brand")
    df_brand = df_brand.merge(df_total_cost, on="Brand")
    df_brand = df_brand.merge(df_total_sales, on="Brand" )
    df_brand = df_brand.merge(df_total_profit, on="Brand" )
    
    return(df_brand)


In [6]:
df_brand17 = Brand(df_2017)
df_brand18 = Brand(df_2018)

In [7]:
df_brand17.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalSales,TotalProfit
count,120.0,120.0,120.0,120.0,120.0
mean,11.908333,12.033333,2144.156667,4576.216667,1354.180833
std,22.093003,22.308733,5482.949351,21932.2835,8483.535603
min,1.0,1.0,1.0,26.0,0.0
25%,1.0,1.0,136.75,225.0,47.625
50%,3.0,3.5,551.0,714.0,131.5
75%,11.25,12.0,1930.25,2765.25,667.625
max,124.0,128.0,47673.8,232090.0,92286.0


In [8]:
df_brand17.sort_values(by=['TotalCost'], ascending=False ).head()

Unnamed: 0,Brand,NumOrders,TotalQuantity,TotalCost,TotalSales,TotalProfit
102,Stuart Weitzman,123,128.0,47673.8,63925.0,12033.7
57,Lamer/La Prairie/Chantecaille,2,9.0,31014.0,232090.0,92286.0
111,Tory Burch,79,79.0,12762.1,16788.0,2740.9
21,Bvlgari,31,31.0,9291.0,13585.0,4126.5
25,Canada Goose,12,12.0,9246.0,10573.0,1002.0


In [9]:
df_brand17 = df_brand17[(df_brand17.TotalSales < 232090 )]
df_brand17.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalSales,TotalProfit
count,119.0,119.0,119.0,119.0,119.0
mean,11.991597,12.058824,1901.552941,2664.336134,590.047899
std,22.167504,22.401307,4816.104262,6538.336298,1385.136131
min,1.0,1.0,1.0,26.0,0.0
25%,1.0,1.0,136.5,224.0,47.25
50%,3.0,3.0,520.0,692.0,131.0
75%,11.5,12.0,1917.5,2660.5,657.0
max,124.0,128.0,47673.8,63925.0,12033.7


In [10]:
df_brand18.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalSales,TotalProfit
count,55.0,55.0,55.0,55.0,55.0
mean,5.872727,6.072727,1514.047273,2134.472727,514.752727
std,7.620767,7.610554,2172.242152,2957.341836,799.596361
min,1.0,1.0,16.0,47.0,7.0
25%,1.0,1.0,139.0,269.5,68.0
50%,3.0,3.0,726.0,1066.0,209.0
75%,6.0,7.0,1718.0,2158.0,519.0
max,42.0,42.0,12315.0,15370.0,3386.0


In [11]:
df_brand18.sort_values(by=['TotalCost'], ascending=False ).head()

Unnamed: 0,Brand,NumOrders,TotalQuantity,TotalCost,TotalSales,TotalProfit
29,Lamer/La Prairie/Chantecaille,1,1.0,12315.0,15370.0,3050.0
46,Stuart Weitzman,22,22.0,6492.0,10355.0,3386.0
33,Macy Mcooy,3,3.0,5449.6,6812.0,1302.4
23,Jcrew,1,1.0,4990.0,5870.0,875.0
35,Maje,42,42.0,4588.0,7817.0,2754.0


In [12]:
df_brand18 = df_brand17[(df_brand17.TotalSales < 15370 )]
df_brand18.describe()

Unnamed: 0,NumOrders,TotalQuantity,TotalCost,TotalSales,TotalProfit
count,117.0,117.0,117.0,117.0,117.0
mean,10.470085,10.495726,1417.511966,2020.025641,473.855556
std,18.777542,18.801423,2068.921998,2999.372881,877.06472
min,1.0,1.0,1.0,26.0,0.0
25%,1.0,1.0,136.0,222.0,46.5
50%,3.0,3.0,497.0,689.0,127.5
75%,11.0,11.0,1767.0,2514.0,622.0
max,124.0,124.0,9291.0,15361.0,5791.5


In [13]:
# Save Brands datasets to csv file for Forecasting

df_brand17.to_csv('data/cust17.csv',index=False)
df_brand18.to_csv('data/cust18.csv',index=False)