# Stats

## Loading Packages

In [1]:
using Distributions 

include("printmat.jl")   #just a function for prettier matrix printing

printlnPs (generic function with 1 method)

In [2]:
using Plots

backend = "gr"              #"gr" (default), "pyplot" 

if backend == "pyplot"
    pyplot(size=(600,400))
    default(show=false)               #for pyplot: avoids pop-ups
else    
    gr(size=(600,400))
    default(show=true)
end

# Load Data from Text File

In [3]:
#The following is a portion of MyData.csv (# signs added here):
#date,Mkt-RF,RF,SmallGrowth
#197901,4.18,0.77,10.96
#197902,-3.41,0.73,-2.09
#197903,5.75,0.81,11.71
#197904,0.05,0.8,3.27

xx   = readdlm("Data/MyData.csv",',',header=true)      
x    = xx[1]                                #xx[1] contains the data
println("Column headers: ",xx[2])           #xx[2] contains the headers
println("first four lines of x:") 
printmat(x[1:4,:])

Column headers: AbstractString["date" "Mkt-RF" "RF" "SmallGrowth"]
first four lines of x:
197901.000     4.180     0.770    10.960
197902.000    -3.410     0.730    -2.090
197903.000     5.750     0.810    11.710
197904.000     0.050     0.800     3.270



### Creating Variables

In [4]:
ym  = x[:,1]                    #yearmonth, like 200712
Rme = x[:,2]                    #picking out second column
Rf  = x[:,3]                   
R   = x[:,4] +                  #commands continue on the next line
      0.0                     
Re  = R - Rf

388-element Array{Float64,1}:
  10.19
  -2.82
  10.9 
   2.47
  -2.71
   6.59
   1.0 
   8.55
  -0.87
 -11.43
   8.26
  11.19
  14.48
   ⋮   
  -9.65
  -7.78
   5.53
  -7.38
  12.0 
   5.68
   0.46
  11.27
  -0.39
   1.42
   1.91
   1.03

# Statistics

### Means and Standard Deviations

In [5]:
μ = mean([Rme Re],1)    #,1 to calculate average along a column, gives a row vector
σ = std([Rme Re],1)     #do \sigma[Tab] to get σ

printlnPs("means: ",μ)  #for more stat functions, see the package StatsBase.jl
printlnPs("std: ",σ)

println("\n","cov([Rme Re]): ")          
printmat(cov([Rme Re]))

println("\n","cor([Rme Re]): ")          
printmat(cor([Rme Re]))

means:      0.602     0.303
std:      4.604     8.572

cov([Rme Re]): 
    21.197    28.426
    28.426    73.475


cor([Rme Re]): 
     1.000     0.720
     0.720     1.000



### OLS

In [6]:
ett = ones(size(Rme,1))         #a vector with ones, no. rows from variable
x   = [ett Rme]                 #x is a Tx2 matrix
y   = copy(Re)                  #to get standard OLS notation, copy to get an independent copy
b   = inv(x'x)*x'y              #OLS
b2  = x\y                       #also OLS, quicker and numerically more stable
u   = y - x*b                   #OLS residuals
R2a = 1 - var(u)/var(y) 

println("OLS coefficients, regressing Re on constant and Rme, different calculations")
printmat([b b2])                
printlnPs("R2: ",R2a) 
printlnPs("no. of observations: ",size(Re,1))

OLS coefficients, regressing Re on constant and Rme, different calculations
    -0.504    -0.504
     1.341     1.341

R2:      0.519
no. of observations:        388


### Drawning Random Numbers and Some Basic Stats

In [7]:
x = randn(100,3)                  #matrix of random draws from N(0,1)

println("\n","mean and std of random draws from N(0,1): ")
mu    = mean(x,1)                 
sigma = std(x,1)
printmat([mu;sigma])


mean and std of random draws from N(0,1): 
     0.036     0.045    -0.141
     0.969     1.014     0.841



### Quantiles ("critical values") of Distributions

In [8]:
N05     = quantile(Normal(0,1),0.05) 
Chisq05 = quantile(Chisq(5),0.95)

println("\n","5th percentile of N(0,1) and 95th of Chisquare(5)")      #lots of statistics functions
printmat([N05 Chisq05])


5th percentile of N(0,1) and 95th of Chisquare(5)
    -1.645    11.070



# Statistical Plots

In [9]:
YearFrac = floor(ym/100) + (mod(ym,100)-1)/12    #year + (month-1)/12

plot3a = plot(YearFrac,Rme,color=:blue,legend=false)
plot!(xlims=(1978,2012),ylims=(-25,25))
title!("Time series plot: monthly US equity market excess return")
ylabel!("%")

In [10]:
plot3b = scatter(Rme,Re,color=:blue,legend=false)
plot!([-40;60],[-40;60],color=:black)
plot!(xlims=(-40,40),ylims=(-40,60))
title!("Scatter plot: two monthly return series (and 45 degree line)")
xlabel!("Market excess return, %")
ylabel!("Excess returns on small growth stocks, %")

In [11]:
histogram(Rme,bins=25,legend=false)           
title!("Histogram: monthly US equity market excess return")
xlabel!("Market excess return, %")
ylabel!("Number of months")

In [12]:
println("\n","end of program")


end of program
