# Adidas Sales Analysis

In [32]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Loading & Cleaning the Data

In [33]:
df = pd.read_csv(r"Adidas US Sales Datasets.csv")
df.head(10)

Unnamed: 0,Retailer,Retailer ID,Invoice Date,Region,State,City,Product,Price per Unit,Units Sold,Total Sales,Operating Profit,Operating Margin,Sales Method
0,Foot Locker,1185732,1/1/2020,Northeast,New York,New York,Men's Street Footwear,$50.00,1200,"$600,000","$300,000",50%,In-store
1,Foot Locker,1185732,1/2/2020,Northeast,New York,New York,Men's Athletic Footwear,$50.00,1000,"$500,000","$150,000",30%,In-store
2,Foot Locker,1185732,1/3/2020,Northeast,New York,New York,Women's Street Footwear,$40.00,1000,"$400,000","$140,000",35%,In-store
3,Foot Locker,1185732,1/4/2020,Northeast,New York,New York,Women's Athletic Footwear,$45.00,850,"$382,500","$133,875",35%,In-store
4,Foot Locker,1185732,1/5/2020,Northeast,New York,New York,Men's Apparel,$60.00,900,"$540,000","$162,000",30%,In-store
5,Foot Locker,1185732,1/6/2020,Northeast,New York,New York,Women's Apparel,$50.00,1000,"$500,000","$125,000",25%,In-store
6,Foot Locker,1185732,1/7/2020,Northeast,New York,New York,Men's Street Footwear,$50.00,1250,"$625,000","$312,500",50%,In-store
7,Foot Locker,1185732,1/8/2020,Northeast,New York,New York,Men's Athletic Footwear,$50.00,900,"$450,000","$135,000",30%,Outlet
8,Foot Locker,1185732,1/21/2020,Northeast,New York,New York,Women's Street Footwear,$40.00,950,"$380,000","$133,000",35%,Outlet
9,Foot Locker,1185732,1/22/2020,Northeast,New York,New York,Women's Athletic Footwear,$45.00,825,"$371,250","$129,938",35%,Outlet


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9648 entries, 0 to 9647
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Retailer          9648 non-null   object
 1   Retailer ID       9648 non-null   int64 
 2   Invoice Date      9648 non-null   object
 3   Region            9648 non-null   object
 4   State             9648 non-null   object
 5   City              9648 non-null   object
 6   Product           9648 non-null   object
 7   Price per Unit    9648 non-null   object
 8   Units Sold        9648 non-null   object
 9   Total Sales       9648 non-null   object
 10  Operating Profit  9648 non-null   object
 11  Operating Margin  9648 non-null   object
 12  Sales Method      9648 non-null   object
dtypes: int64(1), object(12)
memory usage: 980.0+ KB


In [35]:
df.isna().sum()

Retailer            0
Retailer ID         0
Invoice Date        0
Region              0
State               0
City                0
Product             0
Price per Unit      0
Units Sold          0
Total Sales         0
Operating Profit    0
Operating Margin    0
Sales Method        0
dtype: int64

In [36]:
df.duplicated().sum()

0

In [37]:
#converting the invoice date column to date time format
df['Invoice Date'] = pd.to_datetime(df['Invoice Date'])

In [38]:
# formatting the ['price per unit', 'units sold', 'total sales', 'operating profit'] columns to remove the comma and change their data type
df['Price per Unit'] = df['Price per Unit'].str.replace('$', '').astype(float)
df['Units Sold'] = df['Units Sold'].str.replace(',', '').astype(int)
df['Total Sales'] = df['Total Sales'].str.replace('[\$,]', '', regex=True).astype(int)
df['Operating Profit'] = df['Operating Profit'].str.replace('[\$,]', '', regex=True).astype(int)

In [39]:
#converting the operating margin to a float
df['Operating Margin'] = df['Operating Margin'].str.replace('%','').astype(float) / 100

In [41]:
#creating new columns to show the month name and day name
df['Month Name'] = df['Invoice Date'].dt.month_name()
df['Day Name'] = df['Invoice Date'].dt.day_name()

In [43]:
#dropping the retailer id column
df.drop('Retailer ID', axis=1, inplace=True)

In [45]:
df['Retailer'].unique()

array(['Foot Locker', 'Walmart', 'Sports Direct', 'West Gear', "Kohl's",
       'Amazon'], dtype=object)

In [46]:
df['Sales Method'].unique()

array(['In-store', 'Outlet', 'Online'], dtype=object)

In [47]:
df['Region'].unique()

array(['Northeast', 'South', 'West', 'Midwest', 'Southeast'], dtype=object)

In [48]:
df['State'].nunique()

50

In [49]:
df['City'].nunique()

52

In [51]:
df['Product'].unique()

array(["Men's Street Footwear", "Men's Athletic Footwear",
       "Women's Street Footwear", "Women's Athletic Footwear",
       "Men's Apparel", "Women's Apparel"], dtype=object)

In [44]:
df

Unnamed: 0,Retailer,Invoice Date,Region,State,City,Product,Price per Unit,Units Sold,Total Sales,Operating Profit,Operating Margin,Sales Method,Month Name,Day Name
0,Foot Locker,2020-01-01,Northeast,New York,New York,Men's Street Footwear,50.0,1200,600000,300000,0.50,In-store,January,Wednesday
1,Foot Locker,2020-01-02,Northeast,New York,New York,Men's Athletic Footwear,50.0,1000,500000,150000,0.30,In-store,January,Thursday
2,Foot Locker,2020-01-03,Northeast,New York,New York,Women's Street Footwear,40.0,1000,400000,140000,0.35,In-store,January,Friday
3,Foot Locker,2020-01-04,Northeast,New York,New York,Women's Athletic Footwear,45.0,850,382500,133875,0.35,In-store,January,Saturday
4,Foot Locker,2020-01-05,Northeast,New York,New York,Men's Apparel,60.0,900,540000,162000,0.30,In-store,January,Sunday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9643,Foot Locker,2021-01-24,Northeast,New Hampshire,Manchester,Men's Apparel,50.0,64,3200,896,0.28,Outlet,January,Sunday
9644,Foot Locker,2021-01-24,Northeast,New Hampshire,Manchester,Women's Apparel,41.0,105,4305,1378,0.32,Outlet,January,Sunday
9645,Foot Locker,2021-02-22,Northeast,New Hampshire,Manchester,Men's Street Footwear,41.0,184,7544,2791,0.37,Outlet,February,Monday
9646,Foot Locker,2021-02-22,Northeast,New Hampshire,Manchester,Men's Athletic Footwear,42.0,70,2940,1235,0.42,Outlet,February,Monday


## Exploratory Data Analysis