## Importing The Required Modules And Functions

In [2]:
import glob
import pandas as pd
from datetime import datetime

## Setting The Path For The Target Files

In [3]:
tmpfile    = "exchangerate_temp.tmp"               # store all extracted data

logfile    = "exchangerate_logfile.txt"            # all event logs will be stored

targetfile = "bank_market_cap_gbp.csv"   # transformed data is stored

## JSON Extract Function

In [4]:
def extract_from_json(file_to_process):
    dataframe = pd.read_json(file_to_process)
    return dataframe

## Extract Function

In [5]:
def extract():
    extracted_data = pd.DataFrame(columns=['Name','Market Cap (US$ Billion)']) # create an empty data frame to hold extracted data
    #process all json files
    for jsonfile in glob.glob("exchange_rate/*.json"):
        extract = extract_from_json(jsonfile)
#         a = extract.to_dict('list')
#         b = a['Name'].values()
#         b = [a['Name'],a['Market Cap (US$ Billion)']]
#         extracted_data = pd.DataFrame(list(zip(Name, Market Cap (US$ Billion)))), columns =['key', 'value']) 
#         extracted_data = pd.DataFrame(list(a.items()),columns = ['Name','Market Cap (US$ Billion)'])
        extracted_data = extracted_data.append(extract_from_json(jsonfile), ignore_index = True)
        
    return extracted_data

## Reading CSV Datafiles

In [6]:
df = pd.read_csv('D:\PycharmProjects\GettingStartedWithSQL\exchange_rate\exchange_rates.csv',index_col=0)
df.head()

Unnamed: 0,Rates
AUD,1.297088
BGN,1.608653
BRL,5.409196
CAD,1.271426
CHF,0.886083


In [7]:
for i in df.index:
    if i == 'GBP':
        exchange_rate = df['Rates'][i]
exchange_rate

0.7323984208000001

In [8]:
for i in df.index:
    if i == 'USD':
        usd = df['Rates'][i]
usd
#1usd = 0.73239842....GBP

1.0

## Transform Data

In [9]:
def transform(data):
    print(data.columns)
    marketcapcol = 'Market Cap (US$ Billion)'
    data[marketcapcol] = exchange_rate * data[marketcapcol]
    data[marketcapcol] =round(data[marketcapcol],3)
    data.rename(columns={marketcapcol: 'Market Cap (GBP$ Billion)'}, inplace=True)
    return data

## Load

In [10]:
def loadtocsv(targetfile,data_to_load):
    data_to_load.to_csv(targetfile, index = False)

In [11]:
from sqlalchemy import create_engine
def loadtodatabase(data_to_load):
    my_conn = create_engine("mysql+mysqldb://root:@localhost/mydatabase")
    try:
#         my_conn.execute("CREATE TABLE exchange_rate (Name VARCHAR(255), MarketCap VARCHAR(255))")
        data_to_load.to_sql('exchange_rate', con=my_conn, index=False, if_exists='replace') 
    except Exception as err:
        print(f"Error: '{err}'")
    
#     data_to_load.to_csv(targetfile, index = False)

## Log

In [12]:
def log(message):
    timestamp_format = '%H:%M:%S-%h-%d-%Y'
    #Hour-Minute-Second-MonthName-Day-Year
    now = datetime.now() # get current timestamp
    timestamp = now.strftime(timestamp_format)
    with open("exchangerate_logfile.txt","a") as f: f.write(timestamp + ',' + message + 'n')

## Running The ETL Process

In [13]:
log("ETL Job Started")

In [14]:
log("Extract phase Started")
extracted_data = extract()
log("Extract phase Ended")
extracted_data

Unnamed: 0,Name,Market Cap (US$ Billion)
0,JPMorgan Chase,390.934
1,Industrial and Commercial Bank of China,345.214
2,Bank of America,325.331
3,Wells Fargo,308.013
4,China Construction Bank,257.399
...,...,...
100,Ping An Bank,37.993
101,Standard Chartered,37.319
102,United Overseas Bank,35.128
103,QNB Group,33.560


In [15]:
log("Transform phase Started")
transformed_data = transform(extracted_data)
log("Transform phase Ended")
transformed_data

Index(['Name', 'Market Cap (US$ Billion)'], dtype='object')


Unnamed: 0,Name,Market Cap (GBP$ Billion)
0,JPMorgan Chase,286.319
1,Industrial and Commercial Bank of China,252.834
2,Bank of America,238.272
3,Wells Fargo,225.588
4,China Construction Bank,188.519
...,...,...
100,Ping An Bank,27.826
101,Standard Chartered,27.332
102,United Overseas Bank,25.728
103,QNB Group,24.579


In [16]:
transformed_data

Unnamed: 0,Name,Market Cap (GBP$ Billion)
0,JPMorgan Chase,286.319
1,Industrial and Commercial Bank of China,252.834
2,Bank of America,238.272
3,Wells Fargo,225.588
4,China Construction Bank,188.519
...,...,...
100,Ping An Bank,27.826
101,Standard Chartered,27.332
102,United Overseas Bank,25.728
103,QNB Group,24.579


In [17]:
log("Load phase Started")
loadtocsv(targetfile,transformed_data)
loadtodatabase(transformed_data)
log("Load phase Ended")

In [18]:
log("ETL Job Ended")