In [1]:
using Revise
using FromFile

In [2]:
using DataFrames
using CSV
using Plots
using Distributions
using Arrow

In [2]:
include("shared/CommonFunctions.jl")
include("shared/CommonConstants.jl")
using .CommonFunctions
using .CommonConstants

In [2]:
@from "../utils.jl" using ProjectUtilities

In [3]:
qpath(PATHS.rawfunds)

"../../data/mutual-funds/raw"

Load up the betas data.

In [5]:
options_folder = option_foldername(; DEFAULT_OPTIONS...)

UndefVarError: UndefVarError: DEFAULT_OPTIONS not defined

In [6]:
model_outputs = Dict()
for (currency_risk, benchmark) in COMPLETE_MODELS
    model_filename = joinpath("..", DIRS.fund, "post-processing", options_folder, "factor-betas/$(benchmark)_$currency_risk.arrow")
    model_outputs[(currency_risk, benchmark)] = DataFrame(Arrow.Table(model_filename))
end

UndefVarError: UndefVarError: COMPLETE_MODELS not defined

Extract the info required to calculate p-values and calculate all p-values.

In [7]:
function compute_pvalues(regression_data)
    regression_data.t = regression_data.coef ./ regression_data.se
    calc_p(row) = ccdf(FDist(1,row.df), row.t^2)
    p = calc_p.(eachrow(regression_data))
    
    return p
end

compute_pvalues (generic function with 1 method)

This function computes the proportions of significant p-values by date.

In [8]:
truth_fraction(bool_array) = count(==(true), bool_array)/length(bool_array)

truth_fraction (generic function with 1 method)

In [9]:
function proportion_sig(regression_data, model, sig_level, factor)
    if factor ∉ [:slope, :level, :either, :both]
        error("factor must be one of :slope, :level, :either, :both")
    end
    
    if factor == :slope
        factorset = [CURRENCYRISK_MODELS[model[1]][1]]
    elseif factor == :level
        factorset = [CURRENCYRISK_MODELS[model[1]][2]]
    else
        factorset = CURRENCYRISK_MODELS[model[1]]
    end
    
    if factor == :either
        checkeach = any
    else factor
        checkeach = all
    end

    currency_risk_only = regression_data[regression_data.factor .∈ Ref(factorset), :]
    obs_groups = groupby(currency_risk_only, [:fundid, :date])
    
    sig_table = combine(obs_groups, :pvalue => (pp->checkeach(pp .<= sig_level)) => :currency_sig)
    
    date_groups = groupby(sig_table, :date)
    sig_proportions = combine(date_groups, :currency_sig => truth_fraction => :proportion_sig)
    
    return sig_proportions
end

proportion_sig (generic function with 1 method)

This function can be called on a model name to compute the p-values, compute proportions of significant p-values by date, altogether

In [10]:
function build_proportion_sig_data(regression_data, model, sig_level, factor)
    regression_data.pvalue = compute_pvalues(regression_data)
    
    output = proportion_sig(regression_data, model, sig_level, factor)
    sort!(output, :date)
    
    return output
end 

build_proportion_sig_data (generic function with 1 method)

This function plots a time series of proportion values.

In [11]:
function draw_proportion_plot(proportion_table, model_name)
    x = proportion_table.date
    y = proportion_table.proportion_sig
    p = plot(x, y, title=model_name, legend=false)
    plot!(xlabel="Date", xguidefontsize=10)
    plot!(ylabel="Proportion of Funds with \nSignificant Currency Exposure", yguidefontsize=10)
    display(p)
end

draw_proportion_plot (generic function with 1 method)

This function creates a plot from a model name and data source only.

In [12]:
function plot_currency_significance(data_source, model, factor=:both; datafilter=nothing)
    benchmark_model_name = replace(string(model[2]), "_"=>" ") |> uppercase
    currency_risk_model_name = uppercase(string(model[1]))
    plot_type = uppercase(string(factor))
    plot_title = "$benchmark_model_name + $currency_risk_model_name ($plot_type)"
    
    if isnothing(datafilter)
        data = data_source[model]
    else
        data = infofilter(datafilter, data_source[model], notebook=true)
        println("Filtered data to $(length(unique(data.fundid))) funds")
    end

    proportion_table = build_proportion_sig_data(data, model, 0.05, factor)
    
    draw_proportion_plot(proportion_table, plot_title)
end

plot_currency_significance (generic function with 2 methods)

In [13]:
function allplots(data_source, model; datafilter=nothing)
    for type in [:slope, :level, :either, :both]
        plot_currency_significance(data_source, model, type, datafilter=datafilter)
    end
end

allplots (generic function with 1 method)

In [14]:
allplots(model_outputs, (:verdelhan, :world_ff5); datafilter=([:broad_category, :domicile, :passive]=>(x,y,z)->(nonmissing(x=="Equity") && nonmissing(y=="USA") && (ismissing(z) || !z))))

KeyError: KeyError: key (:verdelhan, :world_ff5) not found

In [15]:
plot_currency_significance(model_outputs, (:verdelhan, :world_ff5), :both; datafilter=([:broad_category, :domicile]=>(x,y)->(nonmissing(x=="Equity") && nonmissing(y=="USA"))))

KeyError: KeyError: key (:verdelhan, :world_ff5) not found

In [16]:
plot_currency_significance(model_outputs, (:verdelhan, :world_capm), "both")

KeyError: KeyError: key (:verdelhan, :world_capm) not found

In [17]:
for i in COMPLETE_MODELS
    plot_currency_significance(model_outputs, i, "level")
    plot_currency_significance(model_outputs, i, "slope")
    plot_currency_significance(model_outputs, i, "either")
    plot_currency_significance(model_outputs, i, "both")
end

UndefVarError: UndefVarError: COMPLETE_MODELS not defined