In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import requests
from datetime import *
import math
import time

from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

ModuleNotFoundError: No module named 'pandas'

In [2]:
today = datetime.today() - timedelta(days=1) # since it takes some time to update actual currency data for today

def currency_val(currency, date):
    response = requests.get("https://www.xe.com/currencytables/?from={}&date={}#table-section".format(currency, date.strftime('%Y-%m-%d')))
    return pd.read_html(response.text)[0]

currency_list = currency_val("KRW", today)["Currency"][:166]
currency_list

NameError: name 'datetime' is not defined

## Functions

### Simple Moving Average (SMA) calculation

In [3]:
def moving_average(df, days):
    return df.rolling(days).mean()

### Iterative web scraping for historic currency data

In [4]:
#currency: str(currency of interest)
#day: int(number of days ago) 
#currency_list = list(["currency 1", "currency 2" ...])

#Example: iter_currency("KRW", 200, ["USD", "EUR", "SGD", "JPY"]) 

def iter_currency(currency, day, currency_list): 
    from concurrent.futures import ThreadPoolExecutor
    
    start_time = time.time()
    
    today = datetime.today() - timedelta(days=1)
    currency_list = list(currency_list)
    rate_df = pd.DataFrame()
    
    
    def iter_url(url):
        response = requests.get(url)
        return pd.read_html(response.text)[0]
    
    
    for i in range(0, len(currency_list)):
        print("Processing %d of %d" %(i+1, len(currency_list)))
        rate_list = []
        date_list = []
        response_url = []
        for j in range(day, 0-1, -1):
            date = today - timedelta(days=j)
            date_list.append(date.strftime('%Y-%m-%d'))
            response_url.append("https://www.xe.com/currencytables/?from={}&date={}#table-section".format(currency, date.strftime('%Y-%m-%d')))
        
        with ThreadPoolExecutor(max_workers=200) as pool:
            response_list = list(pool.map(iter_url, response_url))
            
        for k in range(0, len(response_list)):
            df = response_list[k]
            rate_list.append(df.loc[df["Currency"] == list(currency_list)[i]]["%s per unit" %currency].values)
        rate_df[currency_list[i]] = pd.DataFrame(rate_list)
    rate_df.index = date_list
    
    
    global rate
    rate = rate_df
    
    print("----- %.2f s taken -----" % (time.time() - start_time))
    
    return rate # please call 'rate' separately to save the scraped data

## SMA Example (KRW-USD)

### Simple Moving Average over past 200 days

In [5]:
# WARNING: running this cell may cause performance issue while running due to multithreading
# approximately takes 25s  

iter_currency("KRW", 200+200, ["USD"])

NameError: name 'time' is not defined

In [6]:
plt.figure(figsize =(10, 6))
plt.plot(rate.index[200:], rate["USD"][200:], label = "KRW per USD", linewidth = 1, color = 'black')
plt.plot(rate.index[200:], moving_average(rate["USD"], 5)[200:], '--', label = "SMA(5)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 10)[200:], '--', label ="SMA(10)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 20)[200:], '--', label ="SMA(20)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 60)[200:], '--', label ="SMA(60)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 120)[200:], '--', label ="MA(120)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 200)[200:], '--', label ="MA(200)", linewidth = 1)

plt.xticks(np.arange(len(rate.index[200:])-1, 0, -30), rotation = 30, fontsize = 8)
plt.xlabel("Date")
plt.ylabel("Korean Won (KRW)")
plt.title("Exchange rate of Korean Won per US Dollar (%s ~ %s)" % (rate.index[200], rate.index[-1]))
plt.legend(bbox_to_anchor=(1.2, 1.017))
plt.margins(x=0, y=0.05)

NameError: name 'plt' is not defined

### Simple Moving Average over past 3 years

In [7]:
# WARNING: running this cell may cause performance issue while running due to multithreading
# approximately takes 80s 

iter_currency("KRW", 365*3+200, ["USD"])

NameError: name 'time' is not defined

In [8]:
plt.figure(figsize =(10, 6))
plt.plot(rate.index[200:], rate["USD"][200:], label = "KRW per USD", linewidth = 1, color = 'black')
#plt.plot(rate.index[200:], moving_average(rate["USD"], 5)[200:].dropna(),label ="SMA(5)", linewidth = 1)
#plt.plot(rate.index[200:], moving_average(rate["USD"], 10)[200:].dropna(),label ="SMA(10)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 20)[200:].dropna(), label ="SMA(20)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 60)[200:].dropna(), label ="SMA(60)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 120)[200:].dropna(), '--', label ="SMA(120)", linewidth = 1)
plt.plot(rate.index[200:], moving_average(rate["USD"], 200)[200:].dropna(), '--', label ="SMA(200)", linewidth = 1)

plt.xticks(np.arange(len(rate.index[200:])-1, 0, -90), rotation = 30, fontsize = 8)
plt.xlabel("Date")
plt.ylabel("Korean Won (KRW)")
plt.title("Exchange rate of Korean Won per US Dollar (%s ~ %s)" % (rate.index[200], rate.index[-1]))
plt.legend(bbox_to_anchor=(1.01, 1.017))
plt.margins(x=0, y=0.05)

NameError: name 'plt' is not defined

## Exploratory Analysis

In [9]:
# WARNING: running this cell may cause performance issue while running due to multithreading
# Run this cell to scrape historic currency data for KRW over past 200 days
# approximately takes 30 mins  

iter_currency("KRW", 200, currency_list)
KRW_data = rate

NameError: name 'currency_list' is not defined

### Ranking by weighted moving average differential (KRW)

In [10]:
KRW_data = pd.read_csv("KRW 2022.08.02-2023.02.18.csv")
KRW_data.rename(columns = {KRW_data.columns[0] :  "Date"}, inplace = True)

KRW_data.head()

NameError: name 'pd' is not defined

In [11]:
data = KRW_data
weighted_ma = pd.DataFrame([])


ma_list = [5, 20, 60, 120, 200] 
weight_list = [0.2, 0.1333, 0.3333, 0.2, 0.1334]


for i in range(0, len(currency_list)):
    currency_ma = []
    for j in range(0,len(ma_list)):
        currency_ma.append(moving_average(data.iloc[0:,i+1], ma_list[j]).tail(1).values)
    weighted_ma[data.iloc[0:,i+1].name] = pd.DataFrame(currency_ma)
    
    
weighted_ma = weighted_ma.apply(lambda x: (np.asarray(x) * np.asarray(weight_list)).sum())

forex_score = ((weighted_ma - data.drop("Date", axis = 1).iloc[-1]) / weighted_ma * 100).sort_values(ascending = False)
forex_score = pd.DataFrame(forex_score).reset_index()
forex_score.columns = ["Currency abbreviation", "Forex score"]

forex_score

# positive values mean high value, negative values mean low value compared to weighted moving average

NameError: name 'KRW_data' is not defined

In [12]:
currency_code = pd.read_csv("currency code.csv")

currency_code = currency_code.merge(forex_score, how = "inner")

currency_code["PPP"] =  currency_code["GDP per capita"] / currency_code["GDP per capita PPP"]
currency_code["Final score"] = (1/currency_code["PPP"])*0.5+ (np.log(currency_code["Forex score"] + abs(min(currency_code["Forex score"])) + 1))*0.5

currency_code.sort_values(by = ["Final score"], ascending = False)

NameError: name 'pd' is not defined