# Sales Data Analysis with Python

### (A part of Big Data Analysis)

-------------

## Restaurant Sales Dataset
***
Here, 

We have the sales data of a restaurant company from different cities (countries).

This data is available in Excel file format. We are going to analyze and visualize this data.

### Importing the Required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Importing the dataset

In [None]:
data = pd.read_excel(r"Sales_Dataset.xlsx")

In [None]:
data

In [None]:
data.info()                                      # to show some basic informations about the dataset

#### Drop Unwanted Column

In [None]:
data.drop(columns='Unnamed: 0', inplace=True)         # to drop the column from the dataframe

In [None]:
data.head()                                           # to show top 5 records of the dataframe

#### Changing the columns names

In [None]:
data.loc[0]                               # using the loc function to show Ist row values

In [None]:
data.columns                              # to show all the column names

In [None]:
data.columns = data.loc[0]                # changing all the column names

In [None]:
data.head()

#### Drop the unwanted Ist row

In [None]:
data.drop(0, inplace=True)                     # to delete the first row from the dataframe

In [None]:
data

In [None]:
data.info()                               # to show some basic informations about the dataset

#### Consider the Manager column

In [None]:
data.head()

In [None]:
data.Manager

In [None]:
data.Manager.unique()                     # to show all the unique values of the column

In [None]:
data.Manager.nunique()                      # to show count of unique values present in the column

In [None]:
data['Manager'] = data['Manager'].str.strip().str.replace(r'\s+', ' ', regex=True)   # to remove extra spaces in names

In [None]:
data.head(20)

In [None]:
data['Manager'].unique()             # to show all the unique values of the column

In [None]:
data['Manager'].nunique()             # to show count of unique values present in the column

#### Removing the duplicate records

In [None]:
data.head()                     # to show top 5 records of the dataframe

In [None]:
data.describe()                            # to show some summary about the columns

In [None]:
data[data.duplicated()]                    # to show all the duplicate records

In [None]:
data.drop_duplicates(inplace=True)         # to remove the duplicate records

In [None]:
data[data.duplicated()]                    # to check the duplicate records

In [None]:
data.describe()                            # to show some summary about the columns

In [None]:
data[data['Order ID'].duplicated()]        # to check the duplicate records in a column

In [None]:
data[data['Order ID'] == 10483]            # using filtering to show some records

In [None]:
data[data['Order ID'] == 10484]            # using filtering to show some records

In [None]:
data[data['Order ID'] == 10485]           # using filtering to show some records

In [None]:
data.drop(32, inplace=True)               # to remove any row with the index number

In [None]:
data[data['Order ID'] == 10483]

In [None]:
data.drop([33, 34], inplace=True)         # to remove one or more rows with the index numbers

In [None]:
data[data['Order ID'] == 10484]

In [None]:
data[data['Order ID'] == 10485]

In [None]:
data[data['Order ID'].duplicated()]             # to check the duplicate records in a column

In [None]:
data

#### Converting the datatype of columns

In [None]:
data.info()                               # to get some basic info about the dataframe

In [None]:
data.Quantity = data.Quantity.astype(float)  # to change the datatype of a column

In [None]:
data.info()                                  # to get some basic info about the dataframe

In [None]:
data.Quantity = data.Quantity.round()              # to round-off the values of a column

In [None]:
data.Quantity

In [None]:
data.Quantity = data.Quantity.astype(int)             # to change the datatype of a column

In [None]:
data['Quantity']

In [None]:
data['Order ID'] = data['Order ID'].astype(int)        # to change the datatype of a column
data['Price'] = data['Price'].astype(float)            # to change the datatype of a column

In [None]:
data.info()                                       # to get some basic info about the dataframe

In [None]:
data.Date = pd.to_datetime(data.Date)          # to convert the datatype into datetime format

In [None]:
data.Date.dtype                                # to check the datatype of any column

In [None]:
data.info()

In [None]:
data

----------

# Analyzing the Data

#### Q.1) Most Preferred Payment Method ?

In [None]:
data.head()                       # to show top 5 records of the dataframe

In [None]:
data['Payment Method'].unique()                       # to show the unique values of a column

In [None]:
data['Payment Method'].nunique()                      # to show the count of unique values in a column

In [None]:
data['Payment Method'].value_counts()                 # to show the unique values of a column with their counts

In [None]:
data['Payment Method'].value_counts(normalize=True)*100    # to get the result in percentage

In [None]:
data['Payment Method'].value_counts().plot(kind='bar') ;   # to draw the bar graph

#### Q.2) Most Selling Product ?
- By Quantity
- By Revenue


#### By Quantity

In [None]:
data.head()

In [None]:
data.groupby('Product')['Quantity'].sum()            # to make the group of unique values of a column

In [None]:
data.groupby('Product')['Quantity'].sum().sort_values(ascending=False)   # to sort the result

In [None]:
most_quantity = data.groupby('Product')['Quantity'].sum().sort_values(ascending=False)
most_quantity

In [None]:
type(most_quantity)                   # to the type of the variable

In [None]:
most_quantity = most_quantity.reset_index()     # To convert the index of a Series into a column to form a DataFrame
most_quantity

In [None]:
type(most_quantity)                   # to the type of the variable

In [None]:
plt.figure(figsize = (9,4))
plt.bar(most_quantity['Product'], most_quantity['Quantity'], color = ['red', 'black', 'green', 'yellow', 'cyan'], width=0.4)
plt.title("Most Selling Product - By Quantity")              
plt.xlabel("Product")
plt.ylabel("Quantity");

#### By Revenue

In [None]:
data.head()

In [None]:
data['Revenue'] = data['Price'] * data['Quantity']       # to create a new column 'Revenue'

In [None]:
data

In [None]:
data.groupby('Product')['Revenue'].sum().sort_values(ascending = False) # to make the group of unique values of a column

In [None]:
most_revenue = data.groupby('Product')['Revenue'].sum().sort_values(ascending = False)
most_revenue

In [None]:
most_revenue = most_revenue.reset_index()
most_revenue

In [None]:
plt.figure(figsize=(9, 4))
plt.bar(most_revenue['Product'], most_revenue['Revenue'], color = ['green', 'red', 'black', 'yellow', 'cyan'], width = 0.3);
plt.title("Most Selling Product - By Revenue")       # setting the title       
plt.xlabel("Product")                                # setting the x-axis label
plt.ylabel("Revenue");                               # setting the y-axis label

#### Q.3) Which city had maximum revenue
##### or
##### Which Manager earned maximum revenue

In [None]:
data

In [None]:
data.City.unique()                       # to check the unique values of the column 'City'

In [None]:
data.City.nunique()                      # to check the count of unique values of the column 'City'

In [None]:
data.groupby('City')['Revenue'].sum().sort_values(ascending=False)   # using groupby on 'City' & 'Revenue' column

In [None]:
data.Manager.nunique()                   # to check the count of unique values of the column 'Manager'

In [None]:
data.Manager.unique()                   # to check the unique values of the column 'Manager'

In [None]:
data.groupby('Manager')['Revenue'].sum().sort_values(ascending=False)   # using groupby on 'Manager' & 'Revenue' column

#### Q.4) Date wise revenue

In [None]:
data.head()

In [None]:
data.Date.dtype

In [None]:
data.info()

In [None]:
data.plot('Date', 'Revenue', color='red', linewidth=2, figsize=(9,4)) 
plt.title("Date wise Revenue")                        # setting the title
plt.xlabel("Date")                                    # setting the x-axis label
plt.ylabel("Revenue")                                 # setting the y-axis label
plt.show()

#### Q.5) Average Revenue

In [None]:
data.head()

In [None]:
data['Revenue'].mean()

#### Q.6) Average Revenue of November & December month

In [None]:
data.head()

In [None]:
data['Month'] = data['Date'].dt.month              # creating a new column showing Month only

In [None]:
data

In [None]:
m11 = data[data['Month'] == 11]          # filtering the records with month '11'
m11

In [None]:
m11.Revenue.mean()                           # to show the mean revenue

In [None]:
m12 = data[data.Month == 12]                  # filtering the records with month '12'
m12

In [None]:
m12.Revenue.mean()                                  # to show the mean revenue

#### Q.7) Standard Deviation of Revenue and Quantity ?

In [None]:
data['Quantity'].std()

In [None]:
data['Revenue'].std()

#### Q.8) Variance of Revenue and Quantity ?

In [None]:
data['Quantity'].var()

In [None]:
data['Revenue'].var()

#### Q.9) Is revenue increasing or decreasing over time?

In [None]:
data.head()

In [None]:
m11 = data[data['Month'] == 11]          # filtering the records with month '11'
m11

In [None]:
m11['Revenue'].sum()

In [None]:
m12 = data[data.Month == 12]                  # filtering the records with month '12'
m12

In [None]:
m12.Revenue.sum()

#### Q.10) Average 'Quantity Sold' & 'Average Revenue' for each product ?

In [None]:
data.head()

In [None]:
data.groupby('Product').agg({'Quantity':'mean', 'Revenue':'mean'})