### Importing Relevant Libraries

In [38]:
import pandas as pd
from pandasql import sqldf

import gsheets
import gspread
from google.auth import default

### Pre-requisites

- download service account JSON key
- enable Google Sheets API in Google Cloud
- give the service account email view access to the google sheet 

### Import Data from Google Sheet

In [40]:
# Authenticate with your Google account
gc = gspread.service_account(filename="16 - Reading from and Writing to BigQuery/JSON_KEY.json")

# Get the sheet URL
url = 'https://docs.google.com/spreadsheets/d/1Nutd-OMiLR4acHaUYYcdp2AN3KLPEX3wr12LFjWuk-0/edit?gid=2060066405#gid=2060066405'

# Sheet the sheet key
sheet_key = '1Nutd-OMiLR4acHaUYYcdp2AN3KLPEX3wr12LFjWuk-0'

# Sheet the sheet name
sheet_name = 'Superstore'

# Open the sheet 
# (note: the tab name in the Google Sheet should be lower case for this to work)
sheet = gc.open_by_url(url).sheet1
# sheet = gc.open_by_key(sheet_key).sheet1
# sheet = gc.open(sheet_name).sheet1 

# Get the data from the sheet
data = sheet.get_all_values()

# Convert the data to a Pandas DataFrame
df = pd.DataFrame(data[1:], columns=data[0])

In [41]:
df.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2013-152156,2013-11-09,2013-11-12,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2013-152156,2013-11-09,2013-11-12,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2013-138688,2013-06-13,2013-06-17,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2012-108966,2012-10-11,2012-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2012-108966,2012-10-11,2012-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


## Calculating Key Monthly Metrics 

In [42]:
# using the raw orders data to compute some key metrics on a monthly level

query = """
SELECT
  DATE(strftime('%Y', `Order Date`) || '-' || strftime('%m', `Order Date`) || '-' || '01') AS month,
  COUNT(*) AS num_of_rows,
  COUNT(DISTINCT `Order ID`) AS num_of_orders,
  ROUND(SUM(`Sales`)) AS revenue,
  ROUND(SUM(`Profit`)) AS profit,
  ROUND(SUM(`Sales`) / COUNT(DISTINCT `Order ID`)) AS avg_order_value,
  COUNT(DISTINCT `Customer ID`) AS active_customers,
  ROUND(COUNT(DISTINCT `Order ID`) / COUNT(DISTINCT `Customer ID`), 1) AS orders_per_active_customer,
  ROUND(SUM(`Sales`) / COUNT(DISTINCT `Customer ID`)) AS revenue_per_active_customer,
  ROUND(SUM(`Profit`) / COUNT(DISTINCT `Customer ID`)) AS profit_per_active_customer

FROM df
GROUP BY month
ORDER BY month DESC
"""

monthly_metrics = sqldf(query)
monthly_metrics.head()

Unnamed: 0,month,num_of_rows,num_of_orders,revenue,profit,avg_order_value,active_customers,orders_per_active_customer,revenue_per_active_customer,profit_per_active_customer
0,2014-12-01,477,235,90475.0,8533.0,385.0,203,1.0,446.0,42.0
1,2014-11-01,447,252,112326.0,9683.0,446.0,211,1.0,532.0,46.0
2,2014-10-01,302,150,77794.0,9441.0,519.0,139,1.0,560.0,68.0
3,2014-09-01,463,229,90489.0,11395.0,395.0,200,1.0,452.0,57.0
4,2014-08-01,218,110,61516.0,8894.0,559.0,103,1.0,597.0,86.0
