# Cross Validation

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QAHRoddur/JWAS/blob/main/Examples/cross_validation.ipynb)

This notebook is auto-generated from the JWAS.jl wiki page.


In [None]:
using Pkg
Pkg.add("JWAS")
Pkg.precompile()
using JWAS


# Cross-Validation

> **Note:** This example demonstrates k-fold cross-validation methodology. You will need to provide your own genotype data file with sufficient individuals for meaningful fold sizes.


In [None]:
using JWAS,JWAS.Datasets,CSV,DataFrames,DelimitedFiles,Random,Statistics,Distributions

phenofile  = "../data/phenotypes.txt"
pedfile    = "../data/pedigree.txt"
genofile   = "../data/genotypes.txt"

phenotypes   = CSV.read(phenofile,DataFrame,delim = ',',header=true,missingstring=["NA"]);
nind         = size(phenotypes,1)

nfold         = 5
shuffle_index = shuffle(1:nind)
foldsize      = floor(Int,nind/nfold )
accuracy      = zeros(nfold)
for i in 1:nfold
    foldstart = (i-1)*foldsize+1
    if i == nfold
        foldend = nind
    else
        foldend = foldstart + foldsize-1
    end
    test   = shuffle_index[foldstart:foldend]
    train  = shuffle_index[Not(foldstart:foldend)]

    pedigree  = get_pedigree(pedfile,separator=",",header=true);
    global genotypes  = get_genotypes(genofile,separator=',',method="BayesC")

    model_equation  ="y1 = intercept + x1 + x2 + x2*x3 + ID + dam + genotypes"
    model = build_model(model_equation);
    set_covariate(model,"x1");
    set_random(model,"x2");
    set_random(model,"ID dam",pedigree);
    out    = runMCMC(model,phenotypes[train, :]);

    results    = innerjoin(phenotypes[test,:], out["EBV_y1"], on = :ID)
    accuracy[i]  = cor(Float64.(results[!,:EBV]),Float64.(results[!,:y1]))
    println("Accuracy: ",accuracy[i])
end
accuracy
