# Step 1 - Import Data

In [42]:
import pandas as pd

df = pd.read_excel('2 Sample - Superstore.xlsx', sheet_name='Orders')

In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Row ID         9994 non-null   int64         
 1   Order ID       9994 non-null   object        
 2   Order Date     9994 non-null   datetime64[ns]
 3   Ship Date      9994 non-null   datetime64[ns]
 4   Ship Mode      9994 non-null   object        
 5   Customer ID    9994 non-null   object        
 6   Customer Name  9994 non-null   object        
 7   Segment        9994 non-null   object        
 8   Country        9994 non-null   object        
 9   City           9994 non-null   object        
 10  State          9994 non-null   object        
 11  Postal Code    9994 non-null   int64         
 12  Region         9994 non-null   object        
 13  Product ID     9994 non-null   object        
 14  Category       9994 non-null   object        
 15  Sub-Category   9994 n

# Step 2 - Create Segment Summary (Problem Solving)

In [59]:
# df, Segment, Sales (sum)

df_segment_summ = df.groupby('Segment')['Sales'].sum()
df_segment_summ

Segment
Consumer       1.161401e+06
Corporate      7.061464e+05
Home Office    4.296531e+05
Name: Sales, dtype: float64

In [61]:
type(df_segment_summ)

pandas.core.series.Series

In [63]:
df_segment = df_segment_summ.reset_index()
df_segment

Unnamed: 0,Segment,Sales
0,Consumer,1161401.0
1,Corporate,706146.4
2,Home Office,429653.1


In [65]:
df_segment.columns = ['Dimension', 'Value_Sales']
df_segment

Unnamed: 0,Dimension,Value_Sales
0,Consumer,1161401.0
1,Corporate,706146.4
2,Home Office,429653.1


# Step 2 - Create Segment Summary (Better)

In [70]:
df_segment_summ = df.groupby('Segment')['Sales'].sum().reset_index()
df_segment_summ.columns = ['Dimension', 'Value_Sales']
df_segment_summ

Unnamed: 0,Dimension,Value_Sales
0,Consumer,1161401.0
1,Corporate,706146.4
2,Home Office,429653.1


# Step 3 - Create Region Summary

In [74]:
df_region_summ = df.groupby('Region')['Sales'].sum().reset_index()
df_region_summ.columns = ['Dimension', 'Value_Sales']
df_region_summ

Unnamed: 0,Dimension,Value_Sales
0,Central,501239.8908
1,East,678781.24
2,South,391721.905
3,West,725457.8245


# Step 4 - Create Category Summary

In [78]:
df_category_summ = df.groupby('Category')['Sales'].sum().reset_index()
df_category_summ.columns = ['Dimension', 'Value_Sales']
df_category_summ

Unnamed: 0,Dimension,Value_Sales
0,Furniture,741999.7953
1,Office Supplies,719047.032
2,Technology,836154.033


# Step 5 - Create Sub-Category Summary

In [81]:
df_sub_category_summ = df.groupby('Sub-Category')['Sales'].sum().reset_index()
df_sub_category_summ.columns = ['Dimension', 'Value_Sales']
df_sub_category_summ

Unnamed: 0,Dimension,Value_Sales
0,Accessories,167380.318
1,Appliances,107532.161
2,Art,27118.792
3,Binders,203412.733
4,Bookcases,114879.9963
5,Chairs,328449.103
6,Copiers,149528.03
7,Envelopes,16476.402
8,Fasteners,3024.28
9,Furnishings,91705.164


# Step 2 to 5 - Improve

In [91]:
dimention = ['Segment','Region','Category','Sub-Category']

for dim in dimention:
    df_dimention_summ = df.groupby(dim)['Sales'].sum().reset_index()
    df_dimention_summ.columns = ['Dimension', 'Value_Sales']
    print(df_dimention_summ)

In [94]:
dimensions = ['Segment', 'Region', 'Category', 'Sub-Category']

for dim in dimensions:
    df_dimension_summ = df.groupby(dim)['Sales'].sum().reset_index()
    df_dimension_summ.columns = ['Dimension', 'Value_Sales']
    print(f"Summary for Dimension: {dim}")
    print(df_dimension_summ)
    print("-" * 50)  # Separator for better readability

Summary for Dimension: Segment
     Dimension   Value_Sales
0     Consumer  1.161401e+06
1    Corporate  7.061464e+05
2  Home Office  4.296531e+05
--------------------------------------------------
Summary for Dimension: Region
  Dimension  Value_Sales
0   Central  501239.8908
1      East  678781.2400
2     South  391721.9050
3      West  725457.8245
--------------------------------------------------
Summary for Dimension: Category
         Dimension  Value_Sales
0        Furniture  741999.7953
1  Office Supplies  719047.0320
2       Technology  836154.0330
--------------------------------------------------
Summary for Dimension: Sub-Category
      Dimension  Value_Sales
0   Accessories  167380.3180
1    Appliances  107532.1610
2           Art   27118.7920
3       Binders  203412.7330
4     Bookcases  114879.9963
5        Chairs  328449.1030
6       Copiers  149528.0300
7     Envelopes   16476.4020
8     Fasteners    3024.2800
9   Furnishings   91705.1640
10       Labels   12486.3120
1

# Step 6 - Concatenate

In [107]:
# df_segment_summ, df_region_summ, df_category_summ, df_sub_category_summ

pd.concat([df_segment_summ, df_region_summ, df_category_summ, df_sub_category_summ], axis = 0)

Unnamed: 0,Dimension,Value_Sales
0,Consumer,1161401.0
1,Corporate,706146.4
2,Home Office,429653.1
0,Central,501239.9
1,East,678781.2
2,South,391721.9
3,West,725457.8
0,Furniture,741999.8
1,Office Supplies,719047.0
2,Technology,836154.0


# Step 6 - Concatenate (Improved)

In [122]:
df_summary = pd.DataFrame(columns=['Dimension', 'Value_Sales'])
df_summary

Unnamed: 0,Dimension,Value_Sales


In [161]:
dimensions = ['Segment', 'Region', 'Category', 'Sub-Category']
df_summary = pd.DataFrame(columns=['Dimension', 'Value_Sales'])

for dim in dimensions:
    df_result = df.groupby(dim)['Sales'].sum().reset_index()
    df_result.columns = ['Dimension', 'Value_Sales']
    df_summary = pd.concat([df_summary, df_result], axis = 0)
    #print(f"Summary for Dimension: {dim}")
    #print(df_summary)
    #print("-" * 50)  # Separator for better readability

df_summary

Unnamed: 0,Dimension,Value_Sales
0,Consumer,1161401.0
1,Corporate,706146.4
2,Home Office,429653.1
0,Central,501239.9
1,East,678781.2
2,South,391721.9
3,West,725457.8
0,Furniture,741999.8
1,Office Supplies,719047.0
2,Technology,836154.0


# Step 7 - User Defined Function
- Input : df, column names
- Output : Summary Table

In [167]:
def create_summary(input_df, dimensions):
    #dimensions = ['Segment', 'Region', 'Category', 'Sub-Category']
    df_summary = pd.DataFrame(columns=['Dimension', 'Value_Sales'])
    
    for dim in dimensions:
        df_result = input_df.groupby(dim)['Sales'].sum().reset_index()
        df_result.columns = ['Dimension', 'Value_Sales']
        df_summary = pd.concat([df_summary, df_result], axis = 0)
        #print(f"Summary for Dimension: {dim}")
        #print(df_summary)
        #print("-" * 50)  # Separator for better readability
    
    return df_summary

In [175]:
result_summary = create_summary(df, ['Segment', 'Region', 'Category'])

In [177]:
result_summary

Unnamed: 0,Dimension,Value_Sales
0,Consumer,1161401.0
1,Corporate,706146.4
2,Home Office,429653.1
0,Central,501239.9
1,East,678781.2
2,South,391721.9
3,West,725457.8
0,Furniture,741999.8
1,Office Supplies,719047.0
2,Technology,836154.0
