In [5]:
using DataFrames
using CSV
using Plots
using Distributions
using JLD2

include("CommonFunctions.jl")
include("CommonConstants.jl")
using .CommonFunctions
using .CommonConstants

Load up the betas data and have a look at the formatting.

In [6]:
options_folder = option_foldername(currency_type="local", strict_eq=true)

"local-rets_eq-strict"

In [9]:
betas_filename = joinpath("../data/results", options_folder, "betas.jld2")
betas, data_description = load(betas_filename)

Row,fundid,date,world_capm_lrv_betas,world_capm_lrv_net_betas,world_ff3_lrv_betas,world_ff3_verdelhan_betas,world_ff3_lrv_net_betas,world_ffcarhart_lrv_betas,world_ffcarhart_verdelhan_betas,world_ffcarhart_lrv_net_betas,world_ff5_verdelhan_betas,world_ff6_verdelhan_betas,world_ff6_lrv_net_betas,world_ff6_lrv_betas,world_capm_verdelhan_betas,world_ff5_lrv_net_betas,world_ff5_lrv_betas
Unnamed: 0_level_1,String15,Date,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?,NamedTup…?
1,FS00008KO2,2011-09-30,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
2,FS00008KO2,2011-10-31,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
3,FS00008KO2,2011-11-30,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
4,FS00008KO2,2011-12-31,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
5,FS00008KO2,2012-01-31,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
6,FS00008KO2,2012-02-29,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
7,FS00008KO2,2012-03-31,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
8,FS00008KO2,2012-04-30,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
9,FS00008KO2,2012-05-31,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing
10,FS00008KO2,2012-06-30,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing,missing


In [None]:
display(first(betas, 5))

Extract the info required to calculate p-values and calculate all p-values.

In [None]:
betasshort = copy(betas)
dropmissing!(betasshort)

In [None]:
function compute_pvalues(regression_tuple)
    coef = regression_tuple.coef
    se = regression_tuple.se
    df = regression_tuple.df
    
    t = coef ./ se
    p = ccdf.(Ref(FDist(1,df)), t^2)
    
    return p
end

In [None]:
function build_ptable(model, source_df)
    model_col = Symbol("$(model)_betas")
    n_data = size(source_df, 1)
    empty_col = Vector{Float64}(undef, n_data)
    
    currency_factor_names = first(source_df[model]).regressors[end-2:end]
    currency_factor_columns = DataFrame(Dict(f=>copy(empty_col) for f in currency_factor_names))
    
    ptable = hcat(source_df[:, [:fundid, :date]], currency_factor_columns)
    
    for i in 1:n_data
        ptable[i, 3:4] = compute_pvalues(source_df[i, model])[end-2:end]
    end
    
    return ptable
end

This function computes the proportions of significant p-values by date.

In [3]:
truth_fraction(bool_array) = count(==(true), bool_array)/length(bool_array)

truth_fraction (generic function with 1 method)

In [None]:
function proportion_sig(ptable, sig_level)
    ptable.both_sig = (ptable[:,3] .<= sig_level) .&& (ptable[:,4] .<= sig_level)
    
    date_ptable = groupby(ptable, :date)
    sig_proportions = combine(date_ptable, :both_sig => truth_fraction => :sig_proportion)
    
    return sig_proportions
end

This function can be called on a model name to compute the p-values, compute proportions of significant p-values by date, and then plot the results

In [None]:
x = build_ptable(COMPLETE_MODELS[1], data)