# Commercial Store Data

In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [47]:
#load csv file
storeData=pd.read_csv("data.csv", low_memory=False)

In [48]:
# show data
storeData

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
5,536365,22752,SET 7 BABUSHKA NESTING BOXES,2,12/1/2010 8:26,7.65,17850.0,United Kingdom
6,536365,21730,GLASS STAR FROSTED T-LIGHT HOLDER,6,12/1/2010 8:26,4.25,17850.0,United Kingdom
7,536366,22633,HAND WARMER UNION JACK,6,12/1/2010 8:28,1.85,17850.0,United Kingdom
8,536366,22632,HAND WARMER RED POLKA DOT,6,12/1/2010 8:28,1.85,17850.0,United Kingdom
9,536367,84879,ASSORTED COLOUR BIRD ORNAMENT,32,12/1/2010 8:34,1.69,13047.0,United Kingdom


In [51]:
#create dataframe 
df = pd.DataFrame(storeData)

# Add new column 'Total Value' which is Quantity * Unit Price
df['TotalValue'] = df['Quantity'] * df['UnitPrice']
# Convert 'Invoice Date' to datetime format
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Create new column Month and Year from Invoice Date
df['Month-Year'] = df['InvoiceDate'].dt.strftime('%m-%Y')
#show data
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,TotalValue,Month-Year
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,15.3,12-2010
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,12-2010
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,22.0,12-2010
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,12-2010
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,12-2010
5,536365,22752,SET 7 BABUSHKA NESTING BOXES,2,2010-12-01 08:26:00,7.65,17850.0,United Kingdom,15.3,12-2010
6,536365,21730,GLASS STAR FROSTED T-LIGHT HOLDER,6,2010-12-01 08:26:00,4.25,17850.0,United Kingdom,25.5,12-2010
7,536366,22633,HAND WARMER UNION JACK,6,2010-12-01 08:28:00,1.85,17850.0,United Kingdom,11.1,12-2010
8,536366,22632,HAND WARMER RED POLKA DOT,6,2010-12-01 08:28:00,1.85,17850.0,United Kingdom,11.1,12-2010
9,536367,84879,ASSORTED COLOUR BIRD ORNAMENT,32,2010-12-01 08:34:00,1.69,13047.0,United Kingdom,54.08,12-2010


In [52]:
# columns
df.columns


Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country', 'TotalValue', 'Month-Year'],
      dtype='object')

In [53]:
""" column information:
    InvoiceNo:invoice no: of customer
    StockCode:unique identifier assigned to each product 
    Description:product name
    Quantity:quantity of product 
    InvoiceDate:date of purchase
    UnitPrice:price per unit item
    CustomerID:unique ID of customer
    Country:country of customer
    TotalValue:total amount spend
    Month-year:year and month"""

# dataframe information
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 847 entries, 0 to 846
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   InvoiceNo    847 non-null    object        
 1   StockCode    847 non-null    object        
 2   Description  846 non-null    object        
 3   Quantity     847 non-null    int64         
 4   InvoiceDate  847 non-null    datetime64[ns]
 5   UnitPrice    847 non-null    float64       
 6   CustomerID   846 non-null    float64       
 7   Country      847 non-null    object        
 8   TotalValue   847 non-null    float64       
 9   Month-Year   847 non-null    object        
dtypes: datetime64[ns](1), float64(3), int64(1), object(5)
memory usage: 66.3+ KB


In [54]:

# Total rows and columnsd
df.shape

(847, 10)

In [55]:
"""Count: The number of non-null entries.
Mean: The average value.
Standard Deviation (std): The spread of the data.
Min: The minimum value.
25%: The 25th percentile (first quartile).
50%: The 50th percentile (median).
75%: The 75th percentile (third quartile).
Max: The maximum value."""
df.describe()

Unnamed: 0,Quantity,InvoiceDate,UnitPrice,CustomerID,TotalValue
count,847.0,847,847.0,846.0,847.0
mean,12.948052,2010-12-01 11:06:29.539551232,3.116423,15942.267139,24.489481
min,-24.0,2010-12-01 08:26:00,0.0,12431.0,-41.4
25%,2.0,2010-12-01 10:19:00,1.25,14307.0,4.225
50%,5.0,2010-12-01 11:33:00,2.1,16029.0,12.7
75%,12.0,2010-12-01 11:57:00,3.75,17908.0,20.34
max,600.0,2010-12-01 12:23:00,165.0,18085.0,733.44
std,37.283492,,6.336404,1922.284855,59.096466


In [56]:
#to find customer who has spend maximum money in single  item purchase
df.loc[df["TotalValue"].argmax()][["CustomerID","Description","TotalValue"]]

CustomerID           16029.0
Description    CHILLI LIGHTS
TotalValue            733.44
Name: 178, dtype: object

# Product List

In [57]:
#total number of uniqe products 
print(len(df.Description.unique()))
#product List
print("\n".join(map(str, df.Description.unique())))

522
WHITE HANGING HEART T-LIGHT HOLDER
WHITE METAL LANTERN
CREAM CUPID HEARTS COAT HANGER
KNITTED UNION FLAG HOT WATER BOTTLE
RED WOOLLY HOTTIE WHITE HEART.
SET 7 BABUSHKA NESTING BOXES
GLASS STAR FROSTED T-LIGHT HOLDER
HAND WARMER UNION JACK
HAND WARMER RED POLKA DOT
ASSORTED COLOUR BIRD ORNAMENT
POPPY'S PLAYHOUSE BEDROOM 
POPPY'S PLAYHOUSE KITCHEN
FELTCRAFT PRINCESS CHARLOTTE DOLL
IVORY KNITTED MUG COSY 
BOX OF 6 ASSORTED COLOUR TEASPOONS
BOX OF VINTAGE JIGSAW BLOCKS 
BOX OF VINTAGE ALPHABET BLOCKS
HOME BUILDING BLOCK WORD
LOVE BUILDING BLOCK WORD
RECIPE BOX WITH METAL HEART
DOORMAT NEW ENGLAND
JAM MAKING SET WITH JARS
RED COAT RACK PARIS FASHION
YELLOW COAT RACK PARIS FASHION
BLUE COAT RACK PARIS FASHION
BATH BUILDING BLOCK WORD
ALARM CLOCK BAKELIKE PINK
ALARM CLOCK BAKELIKE RED 
ALARM CLOCK BAKELIKE GREEN
PANDA AND BUNNIES STICKER SHEET
STARS GIFT TAPE 
INFLATABLE POLITICAL GLOBE 
VINTAGE HEADS AND TAILS CARD GAME 
SET/2 RED RETROSPOT TEA TOWELS 
ROUND SNACK BOXES SET OF4 WOODLAND 

# Countries List

In [31]:
#total number of uniqe countries 
print(len(df.Country.unique()))
#Country List
print("\n".join(df.Country.unique()))

4
United Kingdom
France
Australia
Netherlands


In [None]:
# Total money spend by customers with respect to each country
salesByCountery=df.groupby('Country')['TotalValue'].sum().sort_values(ascending=False)
# show data
salesByCountery

In [58]:
#  total quantity of products sold over all
totalProductsSold = df.groupby('Description')['Quantity'].sum().sort_values(ascending=False).head(522)
# show data
pd.set_option('display.max_rows', None)
popularProducts


Description
NAMASTE SWAGAT INCENSE                 600
RED TOADSTOOL LED NIGHT LIGHT          463
FAIRY TALE COTTAGE NIGHTLIGHT          432
DISCO BALL CHRISTMAS DECORATION        312
JUMBO BAG RED RETROSPOT                203
CHILLI LIGHTS                          200
RED RETROSPOT OVEN GLOVE               200
PACK OF 72 RETROSPOT CAKE CASES        197
LIGHT GARLAND BUTTERFILES PINK         192
WOODEN OWLS LIGHT GARLAND              192
HAND WARMER SCOTTY DOG DESIGN          163
SMALL POPCORN HOLDER                   148
HAND WARMER BIRD DESIGN                147
HANGING HEART ZINC T-LIGHT HOLDER      144
POLKADOT RAIN HAT                      144
WHITE HANGING HEART T-LIGHT HOLDER     132
PAPER CHAIN KIT 50'S CHRISTMAS         132
HOMEMADE JAM SCENTED CANDLES           132
SET/5 RED RETROSPOT LID GLASS BOWLS    128
HAND WARMER UNION JACK                 127
HAND WARMER RED RETROSPOT              123
ASSORTED COLOUR BIRD ORNAMENT          120
HAND WARMER OWL DESIGN                 111

In [62]:
CustomerTotalSpending = df.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False)
# show data
#pd.set_option('display.max_rows', None)
CustomerTotalSpending

CustomerID
16029.0    3702.12
17511.0    1825.74
17850.0    1499.34
13408.0    1024.68
12583.0     855.86
13694.0     842.12
14307.0     783.11
17920.0     514.41
13767.0     507.88
18074.0     489.60
16218.0     471.30
15311.0     445.33
14688.0     444.98
13448.0     443.96
15983.0     440.89
16098.0     430.60
12838.0     390.79
13047.0     366.63
13758.0     362.45
12431.0     358.25
15513.0     357.00
15862.0     354.23
15100.0     350.40
15291.0     328.80
14045.0     326.40
13705.0     318.14
14849.0     312.94
18085.0     303.90
17924.0     279.00
17908.0     243.28
16583.0     233.45
16250.0     226.14
13255.0     225.70
17968.0     210.30
13748.0     204.00
12791.0     192.60
17420.0     130.85
13747.0      79.60
17809.0      34.80
14527.0     -27.50
17548.0    -141.48
Name: TotalValue, dtype: float64

# Basic visualizations

<h3>products</h3>