# Task 2: Generating a table of transaction data of a sample of owners.

In [1]:
import os
import re
import datetime 
from zipfile import ZipFile
import pandas as pd
import numpy as np
import pandas_gbq
import janitor
import shutil
import glob
from google.cloud import bigquery
from google.oauth2 import service_account

In [2]:
# Building the private key.
service_path = "C:\\Users\\rsmcd\\OneDrive\\Desktop\\MSBA Fall 2022\\" # Path to json file.
service_file = 'reese-msba-9558fdd20984.json' # Name of json file.
gbq_proj_id = 'reese-msba' # Name of project.

# Creates single variable that leads to json file.
private_key =service_path + service_file  

In [3]:
# Now we pass in our credentials so that Python has permission to access our project.
credentials = service_account.Credentials.from_service_account_file(service_path + service_file)

In [4]:
# And finally we establish our connection.
client = bigquery.Client(credentials = credentials, project=gbq_proj_id)

In [5]:
# Look at list of data sets in client.
for item in client.list_datasets() : 
    print(item.full_dataset_id)

reese-msba:dram_shop
reese-msba:wedge_transactions


The code in the next few cells builds the randomized owner table and writes it to csv format.

In [6]:
# A query that grabs a list of owners, randomizes the order, selects 650 of them, and returns all transactions for them.
query = """
    WITH cnd AS(
    SELECT DISTINCT card_no
    FROM `reese-msba.wedge_transactions.transArchive*`
    WHERE card_no != 3
    ORDER BY rand()
    LIMIT 650)
    SELECT *
    FROM `reese-msba.wedge_transactions.transArchive*` as rsmba
    INNER JOIN cnd
    ON cnd.card_no = rsmba.card_no
"""

random_owners = pandas_gbq.read_gbq(query,project_id = gbq_proj_id)

Downloading: 100%|███████████████████████████████████████████████████████| 1521335/1521335 [08:32<00:00, 2967.12rows/s]


In [7]:
random_owners

Unnamed: 0,datetime,register_no,emp_no,trans_no,upc,description,trans_type,trans_subtype,trans_status,department,...,local,organic,display,receipt,card_no,store,branch,match_id,trans_id,card_no_1
0,2014-11-06 19:31:28+00:00,7.0,76.0,95.0,0055474911182,* Manufacturers Coupon,T,CP,C,11.0,...,0.0,,,0.0,48312.0,1.0,0.0,0.0,8.0,48312.0
1,2014-11-06 19:31:32+00:00,7.0,76.0,95.0,0055474911182,* Manufacturers Coupon,T,CP,C,11.0,...,0.0,,,0.0,48312.0,1.0,0.0,0.0,9.0,48312.0
2,2014-10-04 14:20:11+00:00,6.0,33.0,41.0,TAX,Tax,A,,,0.0,...,0.0,,,0.0,48436.0,1.0,0.0,0.0,16.0,48436.0
3,2014-10-04 17:14:32+00:00,4.0,84.0,69.0,TAX,Tax,A,,,0.0,...,0.0,,,0.0,48436.0,1.0,0.0,0.0,17.0,48436.0
4,2014-10-08 10:36:19+00:00,7.0,29.0,71.0,TAX,Tax,A,,,0.0,...,0.0,,,0.0,48436.0,1.0,0.0,0.0,18.0,48436.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1521330,2012-12-13 10:16:26+00:00,8.0,46.0,1.0,0005844977901,O.Mesa Sunrise Cereal 26.4oz,I,,,1.0,...,0.0,1.0,,0.0,16050.0,1.0,0.0,0.0,5.0,16050.0
1521331,2012-12-23 09:24:51+00:00,7.0,70.0,14.0,0073291322151,F&C Auto Dish Powder 75oz 7thG,I,,,1.0,...,0.0,0.0,,0.0,14850.0,1.0,0.0,0.0,8.0,14850.0
1521332,2012-12-23 13:50:58+00:00,2.0,10.0,36.0,0005844977094,O.Honeyd Corn Flakes 26.5oz NP,I,,,1.0,...,0.0,1.0,,0.0,24473.0,1.0,0.0,0.0,35.0,24473.0
1521333,2012-12-23 18:59:15+00:00,8.0,16.0,77.0,0005844977094,O.Honeyd Corn Flakes 26.5oz NP,I,,,1.0,...,0.0,1.0,,0.0,22983.0,1.0,0.0,0.0,15.0,22983.0


In [8]:
# Deletes the csv that will be created if it already exists in the directory.
filePath = 'sample_owner_table.csv';
    
# Check if Folder exists or not.
if os.path.exists(filePath):
    os.remove(filePath)
  
    print("The folder has been deleted successfully!")
else:
    print("Cannot delete the folder as it doesn't exists")

The folder has been deleted successfully!


In [9]:
random_owners.to_csv("sample_owner_table.csv", index=False)