I have done most of my testing in the REPL for today. I started today with a much stricter timeline to finish in, and was set to finish script 3 on time, but bug hunting has blown out my alloted time. I eventually worked out that the various currency series (spot bid, spot mid, spot ask, forward bid, etc...) begin on different dates on some currencies. I don't know what it was about my Python code that made it so I never had to clean that up, but it was causing an error in the Julia code.

In [1]:
using CSV
using DataFrames
using DataStructures
using Dates

const INPUT_FILESTRING_BASE = "../../Data/Raw Data/Currencies"
const OUTPUT_FILESTRING_BASE = "../../Data/Refined Data/Currencies"

const RATE_SETTLEMENTS = ["spot", "forward"]
const RATE_LEVELS = ["bid", "mid", "ask"]
const INVERTED_RATE_LEVELS = Dict("bid" => "ask", "mid" => "mid", "ask" => "bid")
const RATE_TYPES =  Iterators.product(RATE_SETTLEMENTS, RATE_LEVELS)
const DATE_FORMAT = DateFormat("dd/mm/yyyy")

const CIP_VIOLATIONS = [
    (currency="AED", start_date="2006-06-30", end_date="2006-11-30"),
    (currency="IDR", start_date="2000-12-31", end_date="2007-05-31"),
    (currency="MYR", start_date="1998-08-31", end_date="2005-06-30"),
    (currency="TRY", start_date="2000-10-31", end_date="2001-11-30"),
    (currency="ZAR", start_date="1985-07-31", end_date="1985-08-31")
]
const EURO_CONSTITUENTS = [
    (currency="ATS", start_date="1999-01-01"),
    (currency="BEF", start_date="1999-01-01"),
    (currency="DEM", start_date="1999-01-01"),
    (currency="ESP", start_date="1999-01-01"),
    (currency="FIM", start_date="1999-01-01"),
    (currency="FRF", start_date="1999-01-01"),
    (currency="IEP", start_date="1999-01-01"),
    (currency="ITL", start_date="1999-01-01"),
    (currency="NGL", start_date="1999-01-01"),
    (currency="PTE", start_date="1999-01-01"),
    (currency="GRD", start_date="2001-01-01"),
]

const RateSet = Vector{Vector{Union{Missing,Float64}}}

function termcheck(level::String, term)
    if term == "european"
        return level
    elseif term == "american"
        return INVERTED_RATE_LEVELS[level]
    else
        error("Invalid term type: $term")
    end
end
function termcheck(rate::AbstractVector{T}, term) where T<:Union{Missing,Float64}
    if term == "european"
        return rate
    elseif term == "american"
        return 1 ./ rate
    else
        error("Invalid term type: $term")
    end
end

function build_rate_series(currency, info, rate_data)
    currency_info = info[info[:, :cur_code] .== currency, :]
    dates = first(values(rate_data))[!, :date]

    currency_rate_sets = Dict(
        (a, b) => RateSet() for (a,b) in RATE_TYPES
    )

    for series_source in eachrow(currency_info)
        push_source_to_rate_sets!(currency_rate_sets, series_source, rate_data)
    end

    index_columns = OrderedDict(:cur_code => currency, :date => dates)
    rate_columns = OrderedDict(
        Symbol.("$(a)_$b") => layer_series(currency_rate_sets[(a, b)])
        for (a, b) in RATE_TYPES
    )

    rate_series = DataFrame(merge(index_columns, rate_columns))

    return rate_series
end

function push_source_to_rate_sets!(currency_rate_sets, series_source, rate_data)
    for level in RATE_LEVELS
        spot_rate_code = series_source.symbol_s
        forward_rate_code = series_source.symbol_f
        term = series_source.terms
        checked_level = termcheck(level, term)



        spot_series = rate_data[("spot", checked_level)][!, spot_rate_code]
        forward_series = add_forward_points(
            rate_data[("forward", checked_level)][!, forward_rate_code],
            spot_series,
            series_source.f_denom
        )
        
        push!(currency_rate_sets[("spot", checked_level)], termcheck(spot_series, term))
        push!(
            currency_rate_sets[("forward", checked_level)], termcheck(forward_series, term)
        )
    end
end

function add_forward_points(forward_points, spot_series, f_denom::Integer)
    return forward_points ./ f_denom .+ spot_series
end
add_forward_points(forward_series, ::Any, ::Missing) = forward_series


function layer_series(rate_set)
    # The input data is sorted so that sources in later rows have higher priority
    series_size = length(first(rate_set))
    layered_series = fill(missing, series_size)

    for series in Iterators.reverse(rate_set)
        layered_series = coalesce.(layered_series, series)
    end

    return layered_series
end

function assert_equal_dates!(rate_data)
    # Only dates represented in every set are useful. Asserting equality of dates now
    # prevents the need to merge rate data on dates later.
    date_columns = [rate_data[(a, b)].date for (a, b) in RATE_TYPES]

    latest_start_date = maximum([minimum(dates) for dates in date_columns])
    earliest_end_date = minimum([maximum(dates) for dates in date_columns])

    for (a, b) in RATE_TYPES
        post_start_date(row) = row.date .>= latest_start_date
        pre_end_date(row) = row.date .<= earliest_end_date

        filter!(row -> post_start_date(row) .& pre_end_date(row), rate_data[(a, b)])
    end

    date_collection = [rate_data[(a, b)].date for (a, b) in RATE_TYPES]
    
    dates_conform(date_series) = all(date_series .== first(date_collection))

    if !all(date_series -> dates_conform(date_series), date_collection)
        error("The rate data cannot be conformed to a common date series.")
    end
end

function remove_cip_violations!(currency_table)
    for i in CIP_VIOLATIONS
        cip_violation_mask = (
            (currency_table[:, :cur_code] .== i.currency)
            .& (currency_table[:, :date] .>= Date(i.start_date))
            .& (currency_table[:, :date] .<= Date(i.end_date))
        )
        rate_columns = collect("$(a)_$b" for (a, b) in RATE_TYPES) |> vec

        currency_table[cip_violation_mask, rate_columns] .= missing
    end
end

function end_euro_constituents!(currency_table)
    for i in EURO_CONSTITUENTS
        euro_constituent_mask = (
            (currency_table[:, :cur_code] .== i.currency)
            .& (currency_table[:, :date] .>= Date(i.start_date))
        )

        deleteat!(currency_table, euro_constituent_mask)
    end
end

#function align_series(currency_table)


function main()
    time_start = time()
    
    println("Reading currency info...")
    info = CSV.read("$INPUT_FILESTRING_BASE/currency_info.csv", DataFrame)
    
    println("Reading raw rate data...")
    rate_data = Dict{Tuple{String, String}, DataFrame}()
    for (a, b) in RATE_TYPES
        filestring = "$INPUT_FILESTRING_BASE/$(a)_$b.csv"
        rate_data[(a, b)] = CSV.read(
            filestring, DataFrame, missingstring="NA",
            dateformat=DATE_FORMAT, types=Dict(:date => Date)
        )
    end

    assert_equal_dates!(rate_data)

    currency_series = DataFrame[]
    unique_currencies = unique(info[:, :cur_code])

    println("Processing rate data...")
    for i in unique_currencies
        push!(currency_series, build_rate_series(i, info, rate_data))
    end

    currency_table = vcat(currency_series...)
    currency_table[!, :date] = lastdayofmonth.(currency_table[!, :date])

    remove_cip_violations!(currency_table)
    end_euro_constituents!(currency_table)
    align_series!(currency_table)
    
    CSV.write("$OUTPUT_FILESTRING_BASE/currency_rates.csv", currency_table)

    time_duration = round(time() - time_start, digits=2)
    println("Finished refining currency data in $time_duration seconds")
end

main (generic function with 1 method)

In [2]:
info = CSV.read("$INPUT_FILESTRING_BASE/currency_info.csv", DataFrame)
    
println("Reading raw rate data...")
rate_data = Dict{Tuple{String, String}, DataFrame}()
for (a, b) in RATE_TYPES
    filestring = "$INPUT_FILESTRING_BASE/$(a)_$b.csv"
    rate_data[(a, b)] = CSV.read(
        filestring, DataFrame, missingstring="NA",
        dateformat=DATE_FORMAT, types=Dict(:date => Date)
    )
end

assert_equal_dates!(rate_data)

currency_series = DataFrame[]
unique_currencies = unique(info[:, :cur_code])

println("Processing rate data...")
for i in unique_currencies
    push!(currency_series, build_rate_series(i, info, rate_data))
end

currency_table = vcat(currency_series...)
currency_table[!, :date] = lastdayofmonth.(currency_table[!, :date])

remove_cip_violations!(currency_table)
end_euro_constituents!(currency_table)

Reading raw rate data...


Processing rate data...


In [6]:
currency_table

Row,cur_code,date,spot_bid,forward_bid,spot_mid,forward_mid,spot_ask,forward_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1976-01-31,missing,missing,missing,missing,missing,missing
2,AUD,1976-02-29,missing,missing,missing,missing,missing,missing
3,AUD,1976-03-31,missing,missing,missing,missing,missing,missing
4,AUD,1976-04-30,missing,missing,missing,missing,missing,missing
5,AUD,1976-05-31,missing,missing,missing,missing,missing,missing
6,AUD,1976-06-30,missing,missing,missing,missing,missing,missing
7,AUD,1976-07-31,missing,missing,missing,missing,missing,missing
8,AUD,1976-08-31,missing,missing,missing,missing,missing,missing
9,AUD,1976-09-30,missing,missing,missing,missing,missing,missing
10,AUD,1976-10-31,missing,missing,missing,missing,missing,missing


In [7]:
["$(a)_$b" for (a,b) in RATE_TYPES]

2×3 Matrix{String}:
 "spot_bid"     "spot_mid"     "spot_ask"
 "forward_bid"  "forward_mid"  "forward_ask"

In [3]:
using CSV
using DataFrames
using Statistics
using ShiftedArrays: lead, lag

const INPUT_FILESTRING = "../../../../Code - Old Python Version/Data/Refined Data/Currencies/currencies.csv"
const OUTPUT_FILESTRING = "../../Data/Refined Data/Currencies/currency_factors_old_data_julia_script.csv"

const BASKET_ALLOCATION_ORDER = Dict(
    # Quantile number => Order that that quantile receives a currency added to the sample
    1 => 2,
    2 => 4,
    3 => 6,
    4 => 5,
    5 => 3,
    6 => 1
)

function group_transform!(df, group_cols, input_cols, f::Function, output_cols)
    groupby(df, group_cols) |> x -> transform!(x, input_cols => f => output_cols)
end

function group_combine(df, group_cols, input_cols, f::Function, output_cols; cast=true)
    if cast
        groupby(df, group_cols) |> x -> combine(x, input_cols .=> f .=> output_cols)
    else
        groupby(df, group_cols) |> x -> combine(x, input_cols => f => output_cols)
    end
end

function compute_delta_spot!(df)
    group_on = :curr
    input = :spot_mid
    output = :delta_spot

    delta_spot_computation(spot) = log.(spot) - log.(lag(spot))

    group_transform!(df, group_on, input, delta_spot_computation, output)
end

function compute_forward_discount!(df)
    group_on = :curr
    input = [:spot_mid, :for_mid]
    output = :forward_discount

    function forward_discount_computation(spot, forward)
        missing_filter = 0*spot + 0*forward
        forward_discount = log.(lag(forward)) - log.(lag(spot)) + missing_filter

        return forward_discount
    end

    group_transform!(df, group_on, input, forward_discount_computation, output)
end

function compute_carry_returns!(df)
    group_on = :curr
    input = [
        :delta_spot, :forward_discount, :for_bid, :for_ask, :spot_bid, :spot_ask
    ]
    output = [:carry_return, :net_long_carry_return, :net_short_carry_return]

    gross_carry(forward_discount, delta_spot) = forward_discount - delta_spot
    long_net_carry(forward_bid, spot_ask) = log.(lag(forward_bid)) - log.(spot_ask)
    short_net_carry(forward_ask, spot_bid) = -log.(lag(forward_ask)) + log.(spot_bid)

    carry_return_computations(Δs, d, fb, fa, sb, sa) = zip(
        gross_carry(d, Δs), long_net_carry(fb, sa), short_net_carry(fa, sb)
    )

    group_transform!(df, group_on, input, carry_return_computations, output)
end

function compute_interest_rate_ranking!(df)
    group_on = :date
    input = :forward_discount
    output = :interest_rate_rank

    function interest_rate_rank_computation(forward_discount)
        permutation_index = sortperm(forward_discount)
        rank = Array{Union{Missing, Int}}(undef, length(forward_discount))

        for (i, j) in enumerate(permutation_index)
            ismissing(forward_discount[j]) ? (rank[j] = missing) : rank[j] = i
        end

        return rank
    end

    group_transform!(df, group_on, input, interest_rate_rank_computation, output)
end

function assign_interest_rate_baskets!(df)
    group_on = :date
    input = :interest_rate_rank
    output = :interest_rate_basket

    function interest_rate_basket_definition(rank)
        nonmissing_rank = skipmissing(rank)
        isempty(nonmissing_rank) && return fill(missing, length(rank))
        num_currencies = maximum(nonmissing_rank)

        basket_size(i) = Int(ceil((num_currencies - (BASKET_ALLOCATION_ORDER[i]-1))/6))

        max_rank_per_basket = cumsum([basket_size(i) for i in 1:6])

        basket = assign_basket_num.(rank, Ref(max_rank_per_basket))

        return basket
    end

    group_transform!(df, group_on, input, interest_rate_basket_definition, output)
end

function assign_basket_num(row_rank, max_rank_per_basket)
    ismissing(row_rank) && return missing
    basket_num = findfirst(>=(row_rank), max_rank_per_basket)
    return basket_num
end

function combine_net_carry_returns!(df)
    df.net_carry_return = Vector{Union{Missing, Float64}}(missing, nrow(df))
    for row in eachrow(df)
        !ismissing(row.interest_rate_basket) || continue
        
        row.interest_rate_basket == 1 && (row.net_carry_return = row.net_short_carry_return)
        row.net_carry_return = row.net_long_carry_return
    end
end

function aggregate_baskets(df)
    basketed_df = df[.!ismissing.(df.interest_rate_basket), :]

    group_on = [:date, :interest_rate_basket]
    aggregated_columns = [:delta_spot, :forward_discount, :carry_return, :net_carry_return]

    basket_rates = group_combine(
        basketed_df, group_on, aggregated_columns, mean, aggregated_columns
    )

sort!(basket_rates, [:date, :interest_rate_basket])

    return basket_rates
end

function compute_factors(df)
    group_on = :date
    input = [:delta_spot, :carry_return, :net_carry_return, :interest_rate_basket]
    output = [:rx, :hml_fx, :rx_net, :hml_fx_net, :dollar, :carry]

    function hml(basket_nums, series)
        basketed_series = Dict(
            basket => series for (basket, series) in zip(basket_nums, series)
        )

        return basketed_series[6] - basketed_series[1]
    end

    function factor_computations(delta_spot, carry_return, net_carry_return, basket_num)
        rx = mean(skipmissing(carry_return))
        hml_fx = hml(basket_num, carry_return)
        rx_net = mean(skipmissing(net_carry_return))
        hml_fx_net = hml(basket_num, net_carry_return)
        dollar = mean(skipmissing(delta_spot))
        carry = hml(basket_num, delta_spot)
        
        return [(rx, hml_fx, rx_net, hml_fx_net, dollar, carry)]
    end

    factors = group_combine(df, group_on, input, factor_computations, output, cast=false)

    return factors
end
    

function main()
    rates = CSV.read(INPUT_FILESTRING, DataFrame)
    
    compute_delta_spot!(rates)
    compute_forward_discount!(rates)
    compute_carry_returns!(rates)

    compute_interest_rate_ranking!(rates)
    assign_interest_rate_baskets!(rates)
    combine_net_carry_returns!(rates)

    basket_rates = aggregate_baskets(rates)

    currency_factors = compute_factors(basket_rates)

    CSV.write(OUTPUT_FILESTRING, currency_factors)
end



main (generic function with 1 method)

In [5]:
main()

"../../Data/Refined Data/Currencies/currency_factors_old_data_julia_script.csv"

Is there a difference between the old currency data and the new?

In [50]:
old_python_filestring_base = "../../../../Code - Old Python Version/Data/Raw Data/Currencies"
new_julia_filestring_base = "../../Data/Raw Data/Currencies"

currfiles = ["currency_info", "forward_ask", "forward_bid", "spot_ask", "spot_bid", "forward_mid", "spot_mid"]

python_data = Dict(i => CSV.read(joinpath(old_python_filestring_base, "$i.csv"), DataFrame) for i in currfiles)
julia_data = Dict(i => CSV.read(joinpath(new_julia_filestring_base, "$i.csv"), DataFrame) for i in currfiles)

Dict{String, DataFrame} with 7 entries:
  "spot_ask"      => [1m552×74 DataFrame[0m…
  "forward_mid"   => [1m552×74 DataFrame[0m…
  "spot_mid"      => [1m552×74 DataFrame[0m…
  "spot_bid"      => [1m552×74 DataFrame[0m…
  "currency_info" => [1m73×5 DataFrame[0m…
  "forward_bid"   => [1m552×74 DataFrame[0m…
  "forward_ask"   => [1m552×74 DataFrame[0m…

In [12]:
for i in currfiles
    @show i
    @show isequal(python_data[i], julia_data[i])
end

i = "currency_info"
isequal(python_data[i], julia_data[i]) = true
i = "forward_ask"
isequal(python_data[i], julia_data[i]) = true
i = "forward_bid"
isequal(python_data[i], julia_data[i]) = true
i = "spot_ask"
isequal(python_data[i], julia_data[i]) = true
i = "spot_bid"
isequal(python_data[i], julia_data[i]) = true
i = "forward_mid"
isequal(python_data[i], julia_data[i]) = true
i = "spot_mid"
isequal(python_data[i], julia_data[i]) = true


Exactly the same. Let's try again.

In [1]:
using CSV
using DataFrames
using Statistics
using ShiftedArrays: lead, lag

basket_allocation_order = Dict(
    # Quantile number => Order that that quantile receives a currency added to the sample
    1 => 2,
    2 => 4,
    3 => 6,
    4 => 5,
    5 => 3,
    6 => 1
)

function group_transform!(df, group_cols, input_cols, f::Function, output_cols)
    groupby(df, group_cols) |> x -> transform!(x, input_cols => f => output_cols)
end

function group_combine(df, group_cols, input_cols, f::Function, output_cols; cast=true)
    if cast
        groupby(df, group_cols) |> x -> combine(x, input_cols .=> f .=> output_cols)
    else
        groupby(df, group_cols) |> x -> combine(x, input_cols => f => output_cols)
    end
end

function compute_delta_spot!(df, cur_code_name)
    group_on = cur_code_name
    input = :spot_mid
    output = :delta_spot

    delta_spot_computation(spot) = log.(spot) - log.(lag(spot))

    group_transform!(df, group_on, input, delta_spot_computation, output)
end

function compute_forward_discount!(df, cur_code_name, forward_mid_name)
    group_on = cur_code_name
    input = [:spot_mid, forward_mid_name]
    output = :forward_discount

    function forward_discount_computation(spot, forward)
        missing_filter = 0*spot + 0*forward
        forward_discount = log.(lag(forward)) - log.(lag(spot)) + missing_filter

        return forward_discount
    end

    group_transform!(df, group_on, input, forward_discount_computation, output)
end

function compute_carry_returns!(df, cur_code_name, forward_bid_name, forward_ask_name)
    group_on = cur_code_name
    input = [
        :delta_spot, :forward_discount, forward_bid_name, forward_ask_name, :spot_bid, :spot_ask
    ]
    output = [:carry_return, :net_long_carry_return, :net_short_carry_return]

    gross_carry(forward_discount, delta_spot) = forward_discount - delta_spot
    long_net_carry(forward_bid, spot_ask) = log.(lag(forward_bid)) - log.(spot_ask)
    short_net_carry(forward_ask, spot_bid) = -log.(lag(forward_ask)) + log.(spot_bid)

    carry_return_computations(Δs, d, fb, fa, sb, sa) = zip(
        gross_carry(d, Δs), long_net_carry(fb, sa), short_net_carry(fa, sb)
    )

    group_transform!(df, group_on, input, carry_return_computations, output)
end

function compute_interest_rate_ranking!(df)
    group_on = :date
    input = :forward_discount
    output = :interest_rate_rank

    function interest_rate_rank_computation(forward_discount)
        permutation_index = sortperm(forward_discount)
        rank = Array{Union{Missing, Int}}(undef, length(forward_discount))

        for (i, j) in enumerate(permutation_index)
            ismissing(forward_discount[j]) ? (rank[j] = missing) : rank[j] = i
        end

        return rank
    end

    group_transform!(df, group_on, input, interest_rate_rank_computation, output)
end

function assign_interest_rate_baskets!(df)
    group_on = :date
    input = :interest_rate_rank
    output = :interest_rate_basket

    function interest_rate_basket_definition(rank)
        nonmissing_rank = skipmissing(rank)
        isempty(nonmissing_rank) && return fill(missing, length(rank))
        num_currencies = maximum(nonmissing_rank)

        basket_size(i) = Int(ceil((num_currencies - (basket_allocation_order[i]-1))/6))

        max_rank_per_basket = cumsum([basket_size(i) for i in 1:6])

        basket = assign_basket_num.(rank, Ref(max_rank_per_basket))

        return basket
    end

    group_transform!(df, group_on, input, interest_rate_basket_definition, output)
end

function assign_basket_num(row_rank, max_rank_per_basket)
    ismissing(row_rank) && return missing
    basket_num = findfirst(>=(row_rank), max_rank_per_basket)
    return basket_num
end

function combine_net_carry_returns!(df)
    df.net_carry_return = Vector{Union{Missing, Float64}}(missing, nrow(df))
    for row in eachrow(df)
        !ismissing(row.interest_rate_basket) || continue
        
        row.interest_rate_basket == 1 && (row.net_carry_return = row.net_short_carry_return)
        row.net_carry_return = row.net_long_carry_return
    end
end

function aggregate_baskets(df)
    basketed_df = df[.!ismissing.(df.interest_rate_basket), :]

    group_on = [:date, :interest_rate_basket]
    aggregated_columns = [:delta_spot, :forward_discount, :carry_return, :net_carry_return]

    basket_rates = group_combine(
        basketed_df, group_on, aggregated_columns, mean, aggregated_columns
    )

sort!(basket_rates, [:date, :interest_rate_basket])

    return basket_rates
end

function compute_factors(df)
    group_on = :date
    input = [:delta_spot, :carry_return, :net_carry_return, :interest_rate_basket]
    output = [:rx, :hml_fx, :rx_net, :hml_fx_net, :dollar, :carry]

    function hml(basket_nums, series)
        basketed_series = Dict(
            basket => series for (basket, series) in zip(basket_nums, series)
        )

        return basketed_series[6] - basketed_series[1]
    end

    function factor_computations(delta_spot, carry_return, net_carry_return, basket_num)
        rx = mean(skipmissing(carry_return))
        hml_fx = hml(basket_num, carry_return)
        rx_net = mean(skipmissing(net_carry_return))
        hml_fx_net = hml(basket_num, net_carry_return)
        dollar = mean(skipmissing(delta_spot))
        carry = hml(basket_num, delta_spot)
        
        return [(rx, hml_fx, rx_net, hml_fx_net, dollar, carry)]
    end

    factors = group_combine(df, group_on, input, factor_computations, output, cast=false)

    return factors
end
    

function main(input, cur_code_name, forward_bid_name, forward_mid_name, forward_ask_name)
    rates = CSV.read(input, DataFrame)
    
    compute_delta_spot!(rates, cur_code_name)
    compute_forward_discount!(rates, cur_code_name, forward_mid_name)
    compute_carry_returns!(rates, cur_code_name, forward_bid_name, forward_ask_name)

    compute_interest_rate_ranking!(rates)
    assign_interest_rate_baskets!(rates)
    combine_net_carry_returns!(rates)

    basket_rates = aggregate_baskets(rates)

    currency_factors = compute_factors(basket_rates)

    return currency_factors
end

main (generic function with 1 method)

In [2]:
old_python_combined_filestring_base = "../../../../Code - Old Python Version/Data/Refined Data/Currencies"
new_julia_combined_filestring_base = "../../Data/Refined Data/Currencies"

python_factors = main(joinpath(old_python_combined_filestring_base, "currencies.csv"), :curr, :for_bid, :for_mid, :for_ask)
julia_factors = main(joinpath(new_julia_combined_filestring_base, "currency_rates.csv"), :cur_code, :forward_bid, :forward_mid, :forward_ask)

Row,date,rx,hml_fx,rx_net,hml_fx_net,dollar,carry
Unnamed: 0_level_1,Date,Float64,Float64,Float64,Float64,Float64,Float64
1,1983-11-30,-0.0164371,-0.00301259,-0.0172202,-0.00294848,0.0153914,0.0110917
2,1983-12-31,-0.00855435,0.00924729,-0.0104163,0.00531925,0.00786787,0.000485357
3,1984-01-31,-0.0300285,0.0240907,-0.031178,0.0240164,0.0293586,-0.0134736
4,1984-02-29,0.0465761,0.0141806,0.0458362,0.0144424,-0.0476108,-0.00504086
5,1984-03-31,-0.00102185,-0.00814813,-0.00190061,-0.00849513,0.0011936,0.0208235
6,1984-04-30,-0.0345279,0.0119197,-0.0355351,0.0104154,0.0330083,-0.00350537
7,1984-05-31,-0.00866225,0.000918446,-0.00949107,0.000484597,0.00796162,0.00876436
8,1984-06-30,-0.0222722,-0.00196952,-0.0233708,-0.00328071,0.0213212,0.0108247
9,1984-07-31,-0.0507343,-0.0586337,-0.0518118,-0.0604458,0.0490705,0.0674597
10,1984-08-31,0.00952975,0.0259592,0.00805588,0.0228228,-0.00983357,-0.0126415


In [23]:
python_factors

Row,date,rx,hml_fx,rx_net,hml_fx_net,dollar,carry
Unnamed: 0_level_1,Date,Float64,Float64,Float64,Float64,Float64,Float64
1,1983-11-30,-0.0164371,-0.00301259,-0.0172202,-0.00294848,0.0153914,0.0110917
2,1983-12-31,-0.00855435,0.00924729,-0.0104163,0.00531925,0.00786787,0.000485357
3,1984-01-31,-0.0300285,0.0240907,-0.031178,0.0240164,0.0293586,-0.0134736
4,1984-02-29,0.0465761,0.0141806,0.0458362,0.0144424,-0.0476108,-0.00504086
5,1984-03-31,-0.00102185,-0.00814813,-0.00190061,-0.00849513,0.0011936,0.0208235
6,1984-04-30,-0.0345279,0.0119197,-0.0355351,0.0104154,0.0330083,-0.00350537
7,1984-05-31,-0.00866225,0.000918446,-0.00949107,0.000484597,0.00796162,0.00876436
8,1984-06-30,-0.0222722,-0.00196952,-0.0233708,-0.00328071,0.0213212,0.0108247
9,1984-07-31,-0.0507343,-0.0586337,-0.0518118,-0.0604458,0.0490705,0.0674597
10,1984-08-31,0.00952975,0.0259592,0.00805588,0.0228228,-0.00983357,-0.0126415


In [24]:
julia_factors

Row,date,rx,hml_fx,rx_net,hml_fx_net,dollar,carry
Unnamed: 0_level_1,Date,Float64,Float64,Float64,Float64,Float64,Float64
1,1983-11-30,-0.0164371,-0.00301259,-0.0170909,-0.00294848,0.0153914,0.0110917
2,1983-12-31,-0.00855435,0.00924729,-0.0100971,0.00531925,0.00786787,0.000485357
3,1984-01-31,-0.0300285,0.0240907,-0.0309099,0.0240164,0.0293586,-0.0134736
4,1984-02-29,0.0465761,0.0141806,0.0459695,0.0144424,-0.0476108,-0.00504086
5,1984-03-31,-0.00102185,-0.00814813,-0.00163983,-0.00849513,0.0011936,0.0208235
6,1984-04-30,-0.0345279,0.0119197,-0.035266,0.0104154,0.0330083,-0.00350537
7,1984-05-31,-0.00866225,0.000918446,-0.00921588,0.000484597,0.00796162,0.00876436
8,1984-06-30,-0.0222722,-0.00196952,-0.0230918,-0.00328071,0.0213212,0.0108247
9,1984-07-31,-0.0507343,-0.0586337,-0.051525,-0.0604458,0.0490705,0.0674597
10,1984-08-31,0.00952975,0.0259592,0.00820246,0.0228228,-0.00983357,-0.0126415


In [28]:
python_data_in = CSV.read(joinpath(old_python_combined_filestring_base, "currencies.csv"), DataFrame)
julia_data_in = CSV.read(joinpath(new_julia_combined_filestring_base, "currency_rates.csv"), DataFrame)

Row,cur_code,date,spot_bid,forward_bid,spot_mid,forward_mid,spot_ask,forward_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1976-01-31,missing,missing,missing,missing,missing,missing
2,AUD,1976-02-29,missing,missing,missing,missing,missing,missing
3,AUD,1976-03-31,missing,missing,missing,missing,missing,missing
4,AUD,1976-04-30,missing,missing,missing,missing,missing,missing
5,AUD,1976-05-31,missing,missing,missing,missing,missing,missing
6,AUD,1976-06-30,missing,missing,missing,missing,missing,missing
7,AUD,1976-07-31,missing,missing,missing,missing,missing,missing
8,AUD,1976-08-31,missing,missing,missing,missing,missing,missing
9,AUD,1976-09-30,missing,missing,missing,missing,missing,missing
10,AUD,1976-10-31,missing,missing,missing,missing,missing,missing


Oh of course, the processing of the currency data is different before ever getting to the factor script.

In [37]:
julia_data_in[(julia_data_in.cur_code .== "AUD") .&& (.!ismissing.(julia_data_in.forward_bid)), :]

Row,cur_code,date,spot_bid,forward_bid,spot_mid,forward_mid,spot_ask,forward_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1984-12-31,1.2107,1.2129,1.2114,1.214,1.2121,1.2151
2,AUD,1985-01-31,1.227,1.2308,1.2277,1.2319,1.2285,1.233
3,AUD,1985-02-28,1.405,1.395,1.408,1.399,1.411,1.402
4,AUD,1985-03-31,1.4215,1.4284,1.423,1.4304,1.4245,1.4324
5,AUD,1985-04-30,1.548,1.558,1.55,1.561,1.553,1.564
6,AUD,1985-05-31,1.5049,1.514,1.506,1.5156,1.5071,1.5172
7,AUD,1985-06-30,1.4925,1.5015,1.4936,1.5032,1.4948,1.5049
8,AUD,1985-07-31,1.3799,1.3881,1.3808,1.3895,1.3818,1.3908
9,AUD,1985-08-31,1.4245,1.4347,1.4255,1.4364,1.4265,1.438
10,AUD,1985-09-30,1.4225,1.4324,1.4235,1.4338,1.4245,1.4351


In [35]:
python_data_in[python_data_in.curr .== "AUD", :]

Row,curr,date,spot_bid,spot_mid,spot_ask,for_bid,for_mid,for_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1984-12-31,1.2107,1.2114,1.2121,1.2129,1.214,1.2151
2,AUD,1985-01-31,1.227,1.2277,1.2285,1.2308,1.2319,1.233
3,AUD,1985-02-28,1.405,1.408,1.411,1.395,1.399,1.402
4,AUD,1985-03-31,1.4215,1.423,1.4245,1.4284,1.4304,1.4324
5,AUD,1985-04-30,1.548,1.55,1.553,1.558,1.561,1.564
6,AUD,1985-05-31,1.5049,1.506,1.5071,1.514,1.5156,1.5172
7,AUD,1985-06-30,1.4925,1.4936,1.4948,1.5015,1.5032,1.5049
8,AUD,1985-07-31,1.3799,1.3808,1.3818,1.3881,1.3895,1.3908
9,AUD,1985-08-31,1.4245,1.4255,1.4265,1.4347,1.4364,1.438
10,AUD,1985-09-30,1.4225,1.4235,1.4245,1.4324,1.4338,1.4351


In [32]:
for i in names(python_data_in[:, 3:end])
    @show i
    @show python_data_in[:, i] - julia_data_in[:, i]
end

i = "spot_bid"


DimensionMismatch: DimensionMismatch: dimensions must match: a has dims (Base.OneTo(12256),), b has dims (Base.OneTo(18792),), mismatch at 1

I just modified the refining of currency data and I believe they should be the same now. Trying again.

In [3]:
old_python_combined_filestring_base = "../../../../Code - Old Python Version/Data/Refined Data/Currencies"
new_julia_combined_filestring_base = "../../Data/Refined Data/Currencies"

python_factors = main(joinpath(old_python_combined_filestring_base, "currencies.csv"), :curr, :for_bid, :for_mid, :for_ask)
julia_factors = main(joinpath(new_julia_combined_filestring_base, "currency_rates.csv"), :cur_code, :forward_bid, :forward_mid, :forward_ask)

Row,date,rx,hml_fx,rx_net,hml_fx_net,dollar,carry
Unnamed: 0_level_1,Date,Float64,Float64,Float64,Float64,Float64,Float64
1,1983-11-30,-0.0164371,-0.00301259,-0.0170909,-0.00294848,0.0153914,0.0110917
2,1983-12-31,-0.00855435,0.00924729,-0.0100971,0.00531925,0.00786787,0.000485357
3,1984-01-31,-0.0300285,0.0240907,-0.0309099,0.0240164,0.0293586,-0.0134736
4,1984-02-29,0.0465761,0.0141806,0.0459695,0.0144424,-0.0476108,-0.00504086
5,1984-03-31,-0.00102185,-0.00814813,-0.00163983,-0.00849513,0.0011936,0.0208235
6,1984-04-30,-0.0345279,0.0119197,-0.035266,0.0104154,0.0330083,-0.00350537
7,1984-05-31,-0.00866225,0.000918446,-0.00921588,0.000484597,0.00796162,0.00876436
8,1984-06-30,-0.0222722,-0.00196952,-0.0230918,-0.00328071,0.0213212,0.0108247
9,1984-07-31,-0.0507343,-0.0586337,-0.051525,-0.0604458,0.0490705,0.0674597
10,1984-08-31,0.00952975,0.0259592,0.00820246,0.0228228,-0.00983357,-0.0126415


In [42]:
python_factors

Row,date,rx,hml_fx,rx_net,hml_fx_net,dollar,carry
Unnamed: 0_level_1,Date,Float64,Float64,Float64,Float64,Float64,Float64
1,1983-11-30,-0.0164371,-0.00301259,-0.0172202,-0.00294848,0.0153914,0.0110917
2,1983-12-31,-0.00855435,0.00924729,-0.0104163,0.00531925,0.00786787,0.000485357
3,1984-01-31,-0.0300285,0.0240907,-0.031178,0.0240164,0.0293586,-0.0134736
4,1984-02-29,0.0465761,0.0141806,0.0458362,0.0144424,-0.0476108,-0.00504086
5,1984-03-31,-0.00102185,-0.00814813,-0.00190061,-0.00849513,0.0011936,0.0208235
6,1984-04-30,-0.0345279,0.0119197,-0.0355351,0.0104154,0.0330083,-0.00350537
7,1984-05-31,-0.00866225,0.000918446,-0.00949107,0.000484597,0.00796162,0.00876436
8,1984-06-30,-0.0222722,-0.00196952,-0.0233708,-0.00328071,0.0213212,0.0108247
9,1984-07-31,-0.0507343,-0.0586337,-0.0518118,-0.0604458,0.0490705,0.0674597
10,1984-08-31,0.00952975,0.0259592,0.00805588,0.0228228,-0.00983357,-0.0126415


In [40]:
julia_factors

Row,date,rx,hml_fx,rx_net,hml_fx_net,dollar,carry
Unnamed: 0_level_1,Date,Float64,Float64,Float64,Float64,Float64,Float64
1,1983-11-30,-0.0164371,-0.00301259,-0.0170909,-0.00294848,0.0153914,0.0110917
2,1983-12-31,-0.00855435,0.00924729,-0.0100971,0.00531925,0.00786787,0.000485357
3,1984-01-31,-0.0300285,0.0240907,-0.0309099,0.0240164,0.0293586,-0.0134736
4,1984-02-29,0.0465761,0.0141806,0.0459695,0.0144424,-0.0476108,-0.00504086
5,1984-03-31,-0.00102185,-0.00814813,-0.00163983,-0.00849513,0.0011936,0.0208235
6,1984-04-30,-0.0345279,0.0119197,-0.035266,0.0104154,0.0330083,-0.00350537
7,1984-05-31,-0.00866225,0.000918446,-0.00921588,0.000484597,0.00796162,0.00876436
8,1984-06-30,-0.0222722,-0.00196952,-0.0230918,-0.00328071,0.0213212,0.0108247
9,1984-07-31,-0.0507343,-0.0586337,-0.051525,-0.0604458,0.0490705,0.0674597
10,1984-08-31,0.00952975,0.0259592,0.00820246,0.0228228,-0.00983357,-0.0126415


In [4]:
python_data_in = CSV.read(joinpath(old_python_combined_filestring_base, "currencies.csv"), DataFrame)
julia_data_in = CSV.read(joinpath(new_julia_combined_filestring_base, "currency_rates.csv"), DataFrame)

Row,cur_code,date,spot_bid,forward_bid,spot_mid,forward_mid,spot_ask,forward_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AED,1995-06-30,3.6725,3.6715,3.6728,3.67235,3.673,3.6732
2,AED,1995-07-31,3.6726,3.6716,3.673,3.6726,3.6734,3.6736
3,AED,1995-08-31,3.6723,3.6713,3.6726,3.6722,3.6729,3.6731
4,AED,1995-09-30,3.6724,3.6719,3.6728,3.6728,3.6732,3.6737
5,AED,1995-10-31,3.6729,3.6725,3.6731,3.67295,3.6732,3.6734
6,AED,1995-11-30,3.6729,3.6724,3.673,3.67285,3.6731,3.6733
7,AED,1995-12-31,3.6727,3.6722,3.6729,3.67275,3.673,3.6733
8,AED,1996-01-31,3.6727,3.6723,3.6729,3.6729,3.6731,3.6735
9,AED,1996-02-29,3.6722,3.6717,3.6727,3.67255,3.6732,3.6734
10,AED,1996-03-31,3.6725,3.6721,3.6727,3.6727,3.6729,3.6733


In [44]:
first(python_data_in, 5)

Row,curr,date,spot_bid,spot_mid,spot_ask,for_bid,for_mid,for_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AED,1995-06-30,3.6725,3.6728,3.673,3.6715,3.67235,3.6732
2,AED,1995-07-31,3.6726,3.673,3.6734,3.6716,3.6726,3.6736
3,AED,1995-08-31,3.6723,3.6726,3.6729,3.6713,3.6722,3.6731
4,AED,1995-09-30,3.6724,3.6728,3.6732,3.6719,3.6728,3.6737
5,AED,1995-10-31,3.6729,3.6731,3.6732,3.6725,3.67295,3.6734


In [45]:
first(julia_data_in, 5)

Row,cur_code,date,spot_bid,forward_bid,spot_mid,forward_mid,spot_ask,forward_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AED,1995-06-30,3.6725,3.6715,3.6728,3.67235,3.673,3.6732
2,AED,1995-07-31,3.6726,3.6716,3.673,3.6726,3.6734,3.6736
3,AED,1995-08-31,3.6723,3.6713,3.6726,3.6722,3.6729,3.6731
4,AED,1995-09-30,3.6724,3.6719,3.6728,3.6728,3.6732,3.6737
5,AED,1995-10-31,3.6729,3.6725,3.6731,3.67295,3.6732,3.6734


In [53]:
python_name_to_julia = Dict(
    "spot_bid" => "spot_bid", "spot_mid" => "spot_mid", "spot_ask" => "spot_ask",
    "for_bid" => "forward_bid", "for_mid" => "forward_mid", "for_ask" => "forward_ask",
)

for i in names(python_data_in[:, 3:end])
    @show i
    x = python_data_in[:, i] - julia_data_in[:, python_name_to_julia[i]]
    @show x
end

In [16]:
python_name_to_julia = Dict(
    "spot_bid" => "spot_bid", "spot_mid" => "spot_mid", "spot_ask" => "spot_ask",
    "for_bid" => "forward_bid", "for_mid" => "forward_mid", "for_ask" => "forward_ask",
)

for i in names(python_data_in[:, 3:end])
    x = python_data_in[:, i] - julia_data_in[:, python_name_to_julia[i]]
    println(i)
    println(sum(skipmissing(x)))
end

spot_bid


-0.9415352187103792
spot_mid
0.0
spot_ask
0.9415352187103792
for_bid
-0.9823226450426055
for_mid
0.0
for_ask
0.9823226450426055


In [48]:
function not_missing_not_equal(x, y)
    global mask = x .!= y
    mask[ismissing.(x)] .= false
    return convert(Vector{Bool},mask)
end

python_data_in[not_missing_not_equal(python_data_in.spot_bid, julia_data_in.spot_bid), :]

Row,curr,date,spot_bid,spot_mid,spot_ask,for_bid,for_mid,for_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1993-12-31,1.47275,1.47319,1.47384,1.472,1.474,1.475
2,AUD,1994-01-31,1.40944,1.40984,1.41044,1.4108,1.4119,1.413
3,AUD,1994-02-28,1.40252,1.40292,1.40351,1.4045,1.4057,1.4069
4,AUD,1994-03-31,1.42552,1.42653,1.42755,1.4267,1.4279,1.4292
5,AUD,1994-04-30,1.40115,1.40154,1.40213,1.3994,1.4005,1.4015
6,AUD,1994-05-31,1.3541,1.35446,1.35501,1.354,1.355,1.356
7,AUD,1994-06-30,1.36818,1.36855,1.36911,1.3682,1.3693,1.3704
8,AUD,1994-07-31,1.35281,1.35318,1.35373,1.3521,1.3532,1.3543
9,AUD,1994-08-31,1.34282,1.34318,1.34372,1.343,1.344,1.346
10,AUD,1994-09-30,1.35099,1.35135,1.35172,1.3508,1.352,1.3532


In [49]:
julia_data_in[not_missing_not_equal(python_data_in.spot_bid, julia_data_in.spot_bid), :]

Row,cur_code,date,spot_bid,forward_bid,spot_mid,forward_mid,spot_ask,forward_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1993-12-31,1.47384,1.472,1.47319,1.474,1.47275,1.475
2,AUD,1994-01-31,1.41044,1.4108,1.40984,1.4119,1.40944,1.413
3,AUD,1994-02-28,1.40351,1.4045,1.40292,1.4057,1.40252,1.4069
4,AUD,1994-03-31,1.42755,1.4267,1.42653,1.4279,1.42552,1.4292
5,AUD,1994-04-30,1.40213,1.3994,1.40154,1.4005,1.40115,1.4015
6,AUD,1994-05-31,1.35501,1.354,1.35446,1.355,1.3541,1.356
7,AUD,1994-06-30,1.36911,1.3682,1.36855,1.3693,1.36818,1.3704
8,AUD,1994-07-31,1.35373,1.3521,1.35318,1.3532,1.35281,1.3543
9,AUD,1994-08-31,1.34372,1.343,1.34318,1.344,1.34282,1.346
10,AUD,1994-09-30,1.35172,1.3508,1.35135,1.352,1.35099,1.3532


What is the actual spot bid rate in AUD on 1993-12-31?

In [53]:
using Dates

In [62]:
names(python_data["spot_bid"])

74-element Vector{String}:
 "date"
 "BBAUDSP"
 "AUSTDO\$"
 "MBATSSP"
 "AUSTSC\$"
 "BBBELSP"
 "NBBECSP"
 "BELGLU\$"
 "BBCADSP"
 "CNDOLL\$"
 ⋮
 "TAIWDO\$"
 "TDTHBSP"
 "THABAH\$"
 "TDTRYSP"
 "TURKLI\$"
 "TDAEDSP"
 "UAEDIR\$"
 "BBGBPSP"
 "USDOLLR"

In [58]:
python_data["spot_bid"]

Row,date,BBAUDSP,AUSTDO$,MBATSSP,AUSTSC$,BBBELSP,NBBECSP,BELGLU$,BBCADSP,CNDOLL$,BBHKDSP,HKDOLL$,TDCZKSP,CZECHC$,BBDKKSP,DANISH$,USEURSP,FINMAR$,BBFRFSP,FRENFR$,BBDEMSP,DMARKE$,GREDRA$,TDHUFSP,HUNFOR$,INDRUP$,TDIDRSP,INDORU$,BBIEPSP,IPUNTE$,BBITLSP,ITALIR$,BBJPYSP,JAPAYE$,TDKWDSP,KUWADI$,BBMYRSP,MALADL$,MEXPES$,BBNLGSP,GUILDE$,BBNZDSP,NZDOLL$,BBNOKSP,NORKRO$,TDPHPSP,PHILPE$,TDPLNSP,POLZLO$,PORTES$,TDSARSP,SAUDRI$,BBSGDSP,SINGDO$,BBZARSP,COMRAN$,TDKRWSP,KORSWO$,MBESPSP,SPANPE$,BBSEKSP,SWEKRO$,BBCHFSP,SWISSF$,U$NTDSP,TAIWDO$,TDTHBSP,THABAH$,TDTRYSP,TURKLI$,TDAEDSP,UAEDIR$,BBGBPSP,USDOLLR
Unnamed: 0_level_1,String15,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String15,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String15,String15,String7,String7,String7,String7,String7,String7,String15,String7,String15,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String15,String7,String7,String7,String15,String7,String7,String7,String7,String7,String7,String15,String7,String7,String15,String7,String7,String7,String7,String7,String7,String15,String15,String7,String7,String7,String7,String7,Float64
1,31/01/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0295
2,27/02/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.025
3,31/03/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.9155
4,30/04/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.84
5,31/05/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.758
6,30/06/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.7845
7,30/07/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.785
8,31/08/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.777
9,30/09/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.66
10,29/10/1976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.585


In [71]:
findfirst(==("31/12/1993"), python_data["spot_ask"].date)

216

In [67]:
python_data["spot_ask"][python_data["spot_ask"].date .== "31/12/1993", :]

Row,date,BBAUDSP,AUSTDO$,MBATSSP,AUSTSC$,BBBELSP,NBBECSP,BELGLU$,BBCADSP,CNDOLL$,BBHKDSP,HKDOLL$,TDCZKSP,CZECHC$,BBDKKSP,DANISH$,USEURSP,FINMAR$,BBFRFSP,FRENFR$,BBDEMSP,DMARKE$,GREDRA$,TDHUFSP,HUNFOR$,INDRUP$,TDIDRSP,INDORU$,BBIEPSP,IPUNTE$,BBITLSP,ITALIR$,BBJPYSP,JAPAYE$,TDKWDSP,KUWADI$,BBMYRSP,MALADL$,MEXPES$,BBNLGSP,GUILDE$,BBNZDSP,NZDOLL$,BBNOKSP,NORKRO$,TDPHPSP,PHILPE$,TDPLNSP,POLZLO$,PORTES$,TDSARSP,SAUDRI$,BBSGDSP,SINGDO$,BBZARSP,COMRAN$,TDKRWSP,KORSWO$,MBESPSP,SPANPE$,BBSEKSP,SWEKRO$,BBCHFSP,SWISSF$,U$NTDSP,TAIWDO$,TDTHBSP,THABAH$,TDTRYSP,TURKLI$,TDAEDSP,UAEDIR$,BBGBPSP,USDOLLR
Unnamed: 0_level_1,String15,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String15,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String15,String15,String7,String7,String7,String7,String7,String7,String15,String7,String15,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String7,String15,String7,String7,String7,String15,String7,String7,String7,String7,String7,String7,String15,String7,String7,String15,String7,String7,String7,String7,String7,String7,String15,String15,String7,String7,String7,String7,String7,Float64
1,31/12/1993,1.4727,0.679,12.205,12.195,0,36.25,36.186,1.3243,1.3243,7.729,7.728,30.03999,,6.7688,6.7999,,5.8012,5.9045,5.906,1.74,1.737,249.6,100.85,100.8,31.3725,2112,2112,1.413,1.4099,1717.3,1713,111.82,111.63,0.299,0.299,2.697,2.6955,3.107,1.9419,1.9428,1.7927,0.5597,7.5175,7.5218,27.89999,27.9,2.15,,176.9,3.752,3.752,1.609,1.61,3.399,3.3985,807.2,807.2,143.35,143.05,8.3327,8.3402,1.4852,1.4855,26.72,26.63,25.53,25.56,0.01494,0.01494,3.673,,1.479,1.48


In [46]:
size(mask)

(12256,)

In [47]:
python_data_in[convert(Vector{Bool},mask),:]

Row,curr,date,spot_bid,spot_mid,spot_ask,for_bid,for_mid,for_ask
Unnamed: 0_level_1,String3,Date,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,AUD,1993-12-31,1.47275,1.47319,1.47384,1.472,1.474,1.475
2,AUD,1994-01-31,1.40944,1.40984,1.41044,1.4108,1.4119,1.413
3,AUD,1994-02-28,1.40252,1.40292,1.40351,1.4045,1.4057,1.4069
4,AUD,1994-03-31,1.42552,1.42653,1.42755,1.4267,1.4279,1.4292
5,AUD,1994-04-30,1.40115,1.40154,1.40213,1.3994,1.4005,1.4015
6,AUD,1994-05-31,1.3541,1.35446,1.35501,1.354,1.355,1.356
7,AUD,1994-06-30,1.36818,1.36855,1.36911,1.3682,1.3693,1.3704
8,AUD,1994-07-31,1.35281,1.35318,1.35373,1.3521,1.3532,1.3543
9,AUD,1994-08-31,1.34282,1.34318,1.34372,1.343,1.344,1.346
10,AUD,1994-09-30,1.35099,1.35135,1.35172,1.3508,1.352,1.3532


In [39]:
using StatsBase: countmap

In [42]:
xmask

12256-element Vector{Union{Missing, Bool}}:
 false
 false
 false
 false
 false
 false
 false
 false
 false
 false
     ⋮
 false
 false
 false
 false
 false
 false
 false
 false
 false

In [40]:
countmap(mask)

Dict{Union{Missing, Bool}, Int64} with 2 entries:
  false => 11060
  true  => 1196

After much searching, I found that when I flipped american currency series from bid to ask, I saved the series under the FLIPPED name, so if I meant to calculate the bid series, I checked the ask series and saved it as an ask series, thus reversing the flip.

In [3]:
python_data_in = CSV.read(joinpath(old_python_combined_filestring_base, "currencies.csv"), DataFrame)
julia_data_in = CSV.read(joinpath(new_julia_combined_filestring_base, "currency_rates.csv"), DataFrame);

In [5]:
python_name_to_julia = Dict(
    "spot_bid" => "spot_bid", "spot_mid" => "spot_mid", "spot_ask" => "spot_ask",
    "for_bid" => "forward_bid", "for_mid" => "forward_mid", "for_ask" => "forward_ask",
)

for i in names(python_data_in)[3:end]
    println(i)
    println(isequal(python_data_in[:, i], julia_data_in[:, python_name_to_julia[i]]))
end


spot_bid


true
spot_mid
true
spot_ask
true
for_bid
true
for_mid
true
for_ask
true


In [76]:
println("test")

I got the two sets of factor outputs to match exactly.