### *Run below mentioned Commands in a code cell if these packages are not available in your environment*
* ! pip install pandas
* ! pip install matplotlib
* ! pip install os
* ! pip install datetime

### *Loading Libraries*

In [None]:
import pandas as pd
import os
import datetime

#### Import Sales Data

In [None]:
xlsx = pd.ExcelFile('Sales-Data.xlsx')
sheet_names = xlsx.sheet_names
sheet_names

In [None]:
sales_data = pd.read_excel('Sales-Data.xlsx', sheet_name= 'Sales-Data')
sales_data['Month'] = sales_data.InvoiceDate.apply(lambda x : x.strftime('%b'))
sales_data.head()

### *Sorting a dataframe*

In [None]:
### Sorting on Single Variable - Acascending order
sales_data.sort_values(by = 'InvoiceDate', inplace = True)
sorted_sales_data = sales_data.sort_values(by = 'InvoiceDate')

In [None]:
### Sorting on Single Variable - Descending order
sales_data.sort_values(by = 'InvoiceDate', inplace = True, ascending=False)
sales_data.head()

In [None]:
### Sorting on Multiple Variables
sorted_df_multi = sales_data.sort_values(by=['InvoiceDate', 'InvoiceValue'], ascending=[True, False])
sorted_df_multi.head()

In [None]:
### Sorting on Multiple Variables
sorted_df_multi = sales_data.sort_values(by=['InvoiceDate', 'InvoiceValue'], ascending=[True, True])
sorted_df_multi.head()

### *Data Aggregation in Pandas*

#### Aggregate functions used with .agg
* https://sparkbyexamples.com/pandas/pandas-aggregate-functions-with-examples/

In [None]:
sales_data.InvoiceValue.aggregate('sum')

In [None]:
# Aggregating Numberic Values
Aggregated_df = sales_data.InvoiceValue.agg(['sum', 'mean', 'count', 'max', 'min'])
Aggregated_df.index.name = 'Measure'
Aggregated_df.reset_index(name='Statistics Value')

In [None]:
# Aggregating Categorical Values
sales_data.InvoiceDate.aggregate('count')

In [None]:
# Aggregating Categorical Values
frq_date = sales_data.InvoiceDate.value_counts().reset_index(name = 'Number of Invoices')
frq_date

### Grouped Aggregation in Pandas
* https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html

In [None]:
# One variable Group and Aggregation
invoices_by_month = sales_data.groupby(['Month']).aggregate(Count=('InvoiceDate', len), Maximum_Invoice_Value=('InvoiceValue', max), Minimum_Invoice_Value=('InvoiceValue', min)).reset_index()
invoices_by_month

In [None]:
# Multiple variable Group and Aggregation
invoices_by_month = sales_data.groupby(['CustomerID', 'Month']).aggregate(Count=('InvoiceDate', len), Maximum_Invoice_Value=('InvoiceValue', max), Minimum_Invoice_Value=('InvoiceValue', min)).reset_index()
invoices_by_month

In [52]:
### Pivot Table in Python
invoices_by_month = sales_data.pivot_table(index='CustomerID', 
                                            columns='Month', 
                                            values=['InvoiceDate', 'InvoiceValue'], 
                                            aggfunc={'InvoiceDate': 'count', 'InvoiceValue': 'mean'}, 
                                            fill_value=0)
invoices_by_month

In [54]:
### Pivot Table in Python - format specific colum
# Create a custom aggregation function that formats and aggregates
def custom_mean(series):
    return f'{series.sum():.2f}'  # Format the sum to two decimal places

invoices_by_month = sales_data.pivot_table(index='CustomerID', 
                                            columns='Month', 
                                            values=['InvoiceDate', 'InvoiceValue'], 
                                            aggfunc={'InvoiceDate': 'count', 'InvoiceValue': custom_mean}, 
                                            fill_value=0)
invoices_by_month