In [1]:
import pandas as pd
import mysql.connector
import time
import psutil
import cudf
from extract_load_functions import extraction, loading

#set export location
exportLocation = r'/home/jeff/'

#set number of test runs to prefrom
iterations = 31

# Connect to MySQL database
mydb = mysql.connector.connect(host="127.0.0.1",
                               user="root",
                               passwd="0861137MySQL!",
                               database="project1")
print("Your current DB connection is with: ",mydb)


etlTimerStart = time.perf_counter() 
#### Light Extraction ####
###################################################################################################################

# Pull the item table from MySQL
lightExtraction  = extraction(mydb,"select * from item",iterations,exportLocation,"lightExtractionPrfm.csv")


#### LIGHT TRANSFORMATION ####
###################################################################################################################
# Preform light transformation workload.  In this case, identify all item descriptions with "Blue" in them 
# and change them to "Navy"

column_names = ["CPU_utilization", "RAM_utilization", "elapsed_time"]
lightTransPrfm = pd.DataFrame(columns = column_names)

print("STARTING TRANSFORMATION...")
print("Base Transformation CPU utilization: ", psutil.cpu_percent())
print("Base Transformation RAM utilization: ", psutil.virtual_memory().percent)

transTimerStart = time.perf_counter()
#Run 30 iterations to collect transformation df of performance metrics

for sampleNoTransform in range(iterations):
    lightTrans = lightExtraction
    # Start Timer and progress tracker
    start = time.perf_counter()
    
    #Working Code
    lightTrans.item_desc = lightExtraction.item_desc.str.replace('Blue', 'Navy', regex=True)
    #Stop timer  
    stop = time.perf_counter()
    
    
    lightTransPrfm = lightTransPrfm.append(pd.DataFrame({'CPU_utilization': psutil.cpu_percent(),
                                                         'RAM_utilization':  psutil.virtual_memory().percent,
                                                         'elapsed_time':stop - start},
                                                          index=[1]), ignore_index=True)
    #lightTrans = orderItemJoin
    time.sleep(2)
transTimerEnd= time.perf_counter() 

print("Pandas transformation metrics captured.\n")
print("Total extraction time: ", transTimerEnd - transTimerStart, "s")
print("Iterations performed: ", iterations)
print("Average extraction iteration time: ", lightTransPrfm.elapsed_time.mean(), "s")
print("Average CPU utilization: ", lightTransPrfm.CPU_utilization.mean())
print("Average RAM utilization: ", lightTransPrfm.RAM_utilization.mean(), "\n\n")

#Terminate connection from Extraction database, establish connection with Load database
mydb.close()

# Export Light Transformation Performance to local
lightTransPrfm.to_csv (r'/home/jeff/lightTransPrfm.csv', index = False, header=True)


#### cuDF TRANSFORMATION ####
###################################################################################################################

lightTrans = lightExtraction
lightTransPrfmCU = pd.DataFrame(columns = column_names)

print("Starting cuDF TRANSFORMATION...")
print("Base cuDF TRANSFORMATION CPU utilization: ", psutil.cpu_percent())
print("Base cuDF TRANSFORMATION RAM utilization: ", psutil.virtual_memory().percent)

#Run 30 iterations to collect transformation df of performance metrics
transCuTimerStart = time.perf_counter()
for sampleNoTransform in range(iterations):
    ## Transform pandas df to cuDF
    lightTransCU = cudf.DataFrame.from_pandas(lightTrans)
    # Start Timer and progress tracker
    start = time.perf_counter()
    
    #Working Code
    lightTransCU.item_desc = lightTransCU.item_desc.str.replace('Blue', 'Navy', regex=True)
    #Stop timer  
    stop = time.perf_counter()
    
    
    lightTransPrfmCU = lightTransPrfmCU.append(pd.DataFrame({'CPU_utilization': psutil.cpu_percent(),
                                                         'RAM_utilization':  psutil.virtual_memory().percent,
                                                         'elapsed_time':stop - start},
                                                          index=[1]), ignore_index=True)
    #lightTrans = orderItemJoin
    time.sleep(2)
transCuTimerEnd = time.perf_counter() 

print("cuDF performance metrics captured loading complete.\n")
print("Total cuDF TRANSFORMATION time: ", transCuTimerEnd - transCuTimerStart, "s")
print("Iterations performed: ", iterations)
print("Average cuDF TRANSFORMATION iteration time: ", lightTransPrfmCU.elapsed_time.mean(), "s")
print("Average cuDF TRANSFORMATION CPU utilization: ", lightTransPrfmCU.CPU_utilization.mean())
print("Average cuDF TRANSFORMATION RAM utilization: ", lightTransPrfmCU.RAM_utilization.mean(),"\n\n")

# Export Light Transformation Performance to local
lightTransPrfmCU.to_csv (r'/home/jeff/lightTransPrfmCUPrfm.csv', index = False, header=True)

#### Load data ####
###################################################################################################################
loading(lightTrans,'lightTransformation',iterations,exportLocation,"lightExport.csv")

etlTimerEnd = time.perf_counter() 
print('ETL is complete')
print('Elapsed ETL time is: ', (etlTimerEnd-etlTimerStart)/60, ' minutes')

Your current DB connection is with:  <mysql.connector.connection_cext.CMySQLConnection object at 0x7f3e940f9dd0>

STARTING EXTRACTION...
Base EXTRACTION CPU utilization:  5.7
Base EXTRACTION RAM utilization:  15.4
RUNNING...
Data frame loading complete.

Total EXTRACTION time:  62.22990511899843 s
Iterations performed:  31
Average EXTRACTION iteration time:  0.004340671161762364 s
Average CPU utilization:  2.358064516129033
Average RAM utilization:  15.322580645161297 


STARTING TRANSFORMATION...
Base Transformation CPU utilization:  2.4
Base Transformation RAM utilization:  15.3
Pandas transformation metrics captured.

Total extraction time:  62.12514808800188 s
Iterations performed:  31
Average extraction iteration time:  0.0009712968068572903 s
Average CPU utilization:  2.564516129032258
Average RAM utilization:  15.374193548387089 


Starting cuDF TRANSFORMATION...
Base cuDF TRANSFORMATION CPU utilization:  2.1
Base cuDF TRANSFORMATION RAM utilization:  15.4
cuDF performance metri