In [1]:
using DataFrames
using CSV
using Plots
using Distributions
using Arrow

In [2]:
include("CommonFunctions.jl")
include("CommonConstants.jl")
using .CommonFunctions
using .CommonConstants

Load up the betas data.

In [3]:
options_folder = option_foldername(currency_type="local", strict_eq=true)

"local-rets_eq-strict"

In [4]:
model_outputs = Dict()
for (currency_risk, benchmark) in COMPLETE_MODELS
    model_filename = joinpath("../data/results", options_folder, "betas/$(benchmark)_$currency_risk.arrow")
    model_outputs[(currency_risk, benchmark)] = DataFrame(Arrow.Table(model_filename))
end

Extract the info required to calculate p-values and calculate all p-values.

In [10]:
function compute_pvalues(regression_data)
    regression_data.t = regression_data.coef ./ regression_data.se
    calc_p(row) = ccdf(FDist(1,row.df), row.t^2)
    p = calc_p.(eachrow(regression_data))
    
    return p
end

compute_pvalues (generic function with 1 method)

This function computes the proportions of significant p-values by date.

In [11]:
truth_fraction(bool_array) = count(==(true), bool_array)/length(bool_array)

truth_fraction (generic function with 1 method)

In [21]:
function proportion_sig(regression_data, model, sig_level)
    currency_risk_only = regression_data[regression_data.factor .∈ Ref(CURRENCYRISK_MODELS[model[1]]), :]
    obs_groups = groupby(currency_risk_only, [:fundid, :date])
    
    sig_table = combine(obs_groups, :pvalue => (pp->all(pp .<= sig_level)) => :currency_sig)
    
    date_groups = groupby(regression_data, :date)
    sig_proportions = combine(date_groups, :currency_sig => truth_fraction => :proportion_sig)
    
    return sig_proportions
end

proportion_sig (generic function with 1 method)

This function can be called on a model name to compute the p-values, compute proportions of significant p-values by date, altogether

In [19]:
function build_proportion_sig_data(regression_data, model, sig_level)
    regression_data.pvalue = compute_pvalues(regression_data)
    
    return proportion_sig(regression_data, model, sig_level)
end 

build_proportion_sig_data (generic function with 1 method)

In [14]:
testmodel = first(COMPLETE_MODELS)

(:lrv_net, :world_capm)

In [15]:
x = model_outputs[testmodel]

Row,fundid,date,factor,coef,se,df
Unnamed: 0_level_1,String,Date,Symbol,Float64,Float64,Float64
1,FS00008KOI,2016-09-30,const,0.453189,0.305519,57.0
2,FS00008KOI,2016-09-30,MKT,74.9111,10.3368,57.0
3,FS00008KOI,2016-09-30,hml_fx_net,-0.293464,0.142144,57.0
4,FS00008KOI,2016-09-30,rx_net,-0.0204917,0.533077,57.0
5,FS00008KOI,2016-10-31,const,0.42747,0.29879,57.0
6,FS00008KOI,2016-10-31,MKT,78.1904,10.3427,57.0
7,FS00008KOI,2016-10-31,hml_fx_net,-0.256071,0.141446,57.0
8,FS00008KOI,2016-10-31,rx_net,-0.373017,0.573423,57.0
9,FS00008KOI,2016-11-30,const,0.423035,0.303903,56.0
10,FS00008KOI,2016-11-30,MKT,78.2076,10.4346,56.0


In [22]:
testx = build_proportion_sig_data(model_outputs[testmodel], testmodel, 0.05)

LoadError: UndefVarError: data_groups not defined