forked from StevenDowney86/Public_Research_and_Backtests
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Kernal Density Estimators and Fat Tail Distribution Estimation.R
131 lines (106 loc) · 4.65 KB
/
Kernal Density Estimators and Fat Tail Distribution Estimation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
library(quantmod)
getSymbols(c("AXP", "CAT", "SBUX"), from="2007-01-03", to="2016-12-30", src="yahoo")
chartSeries(AXP, theme="white")
chartSeries(CAT, theme="white")
chartSeries(SBUX, theme="white")
# Compute net returns
CAT.netret = diff(CAT$CAT.Adjusted)/lag(CAT$CAT.Adjusted)
AXP.netret = diff(AXP$AXP.Adjusted)/lag(AXP$AXP.Adjusted)
SBUX.netret = diff(SBUX$SBUX.Adjusted)/lag(SBUX$SBUX.Adjusted)
# Discriptive statistic
summary(CAT.netret)
sd(CAT.netret, na.rm=TRUE)
mad(CAT.netret, na.rm=TRUE) # a robust estimate for standard deviation
summary(AXP.netret)
sd(AXP.netret, na.rm=TRUE)
mad(AXP.netret, na.rm=TRUE) # a robust estimat for standard deviation
summary(SBUX.netret)
sd(SBUX.netret, na.rm=TRUE)
mad(SBUX.netret, na.rm=TRUE) # a robust estimat for standard deviatio
# Compute log returns
CAT.logret = diff(log(CAT$CAT.Adjusted))
AXP.logret = diff(log(AXP$AXP.Adjusted))
SBUX.logret = diff(log(SBUX$SBUX.Adjusted))
logrets = list(CAT.logret, AXP.logret, SBUX.logret)
#compute qqplot and KDE and normal distribution
labels = list("CAT", "AXP", "SBUX")
par(mfrow=c(3,2))
for (i in 1:length(logrets)) {
retn = coredata(logrets[[i]])
qqnorm(retn, datax = TRUE,
xlab = "normal quantile", # this is for the theoretical quantiles
ylab = paste("log returns of ", labels[[i]]), # this is for the sample quantiles
main = paste("normal probability plot for log return of ", labels[[i]]))
qqline(retn, datax=TRUE, col = 2)
d <- density(retn, adjust = 1, na.rm = TRUE)
m=mean(retn, na.rm=TRUE)
stdv = mad(retn, na.rm=TRUE)
plot(d, type = "n", xlim=c(m-3*stdv,m+3*stdv),
main= paste("KDE for log returns of", labels[[i]], "and a normal density\nwith the same mean and variance"))
polygon(d, col = "wheat")
z = seq(from=m-5*stdv,to=m+5*stdv,by=stdv/100)
lines(z,dnorm(z,mean=m, sd=stdv), lty=2,lwd=3,col="red")
}
#Comparing Equity retruns with t distribution
par(mfrow=c(3,4))
logrets = list(CAT.logret, AXP.logret, SBUX.logret)
labels = list("CAT", "AXP", "SBUX")
tdfs = c(3,2,2)
for (i in 1:length(logrets)) {
retn = coredata(logrets[[i]])
n = length(retn)
grid = (1:n)/(n+1)
for (tdf in tdfs[[i]]:(tdfs[[i]]+2)) {
qqplot(retn, qt(grid,df=tdf),
main= paste("t-probability plot of", labels[[i]], "\ndf =", tdf),
xlab=paste(labels[[i]], "log-returns"),
ylab="t-quantiles")
qntl = quantile(retn, probs = c(0.25,0.75), na.rm=TRUE, names=FALSE)
abline( lm( qt(c(.25,.75),df = tdf) ~ qntl ), col = 2)
}
d <- density(retn, adjust = 1, na.rm = TRUE)
m=mean(retn, na.rm=TRUE)
stdv = mad(retn, na.rm=TRUE)
plot(d, type = "n", xlim=c(m-5*stdv,m+5*stdv),
main= paste("KDE for log returns of", labels[[i]], "and\n a scaled t-density with DF=", tdfs[[i]]+1))
polygon(d, col = "wheat")
x = seq(from=m-10*stdv,to=m+10*stdv,by=stdv/100)
lines(x,dt((x-m)/stdv,df=tdfs[[i]]+1)/stdv, lty=2,lwd=3,col="red")
}
#Which distribution has the fattest tails?
library(nimble)
n = 1000
set.seed(100)
data_rdexp = rdexp(n, location = 0, scale = 1)
data_rnorm = rnorm(n, mean = 0, sd = 1)
data_rlnorm = rlnorm(n, meanlog = 0, sdlog = 1)
data_rcauchy = rcauchy(n, location = 0, scale = 1)
qqplot(x = data_rnorm, y = data_rdexp, datax = TRUE,
xlab = "normal quantile", # this is for the theoretical quantiles
ylab = paste("Double Exponential quantiles"), # this is for the sample quantiles
main = paste("normal probability plot for Double Exponential"))
qqline(data_rnorm, datax=TRUE, col = 2)
qqplot(x = data_rnorm, y = data_rlnorm, datax = TRUE,
xlab = "normal quantile", # this is for the theoretical quantiles
ylab = paste("Log Normal quantiles"), # this is for the sample quantiles
main = paste("normal probability plot for Log Normal"))
qqline(data_rnorm, datax=TRUE, col = 2)
qqplot(x = data_rlnorm, y = data_rcauchy, datax = TRUE,
xlab = "log normal quantile", # this is for the theoretical quantiles
ylab = paste("Cauchy quantiles"), # this is for the sample quantiles
main = paste("Log normal probability plot for Cauchy"))
qqline(data_rlnorm, datax=TRUE, col = 2)
##Calculating Sum of Squared Errors
getSymbols(c("DAAA","DFF","DGS10","DGS30"), src = "FRED")
data <- na.omit(merge(DAAA, DFF, DGS10, DGS30))
c(dim(data))
DAAA_dif = diff(as.vector(data[,"DAAA"]))
DFF_dif = diff(as.vector(data[,"DFF"]))
DGS10_dif = diff(as.vector(data[,"DGS10"]))
DGS30_dif = diff(as.vector(data[,"DGS30"]))
summary(lm(DAAA_dif ~ DGS10_dif + DGS30_dif + DFF_dif))
anova_data = anova(lm(DAAA_dif ~ DGS10_dif + DGS30_dif + DFF_dif))
regression_SS = sum(anova_data[,"Sum Sq"][1:3])
residual_error_SS = sum(anova_data[,"Sum Sq"][4])
totalSS = regression_SS + residual_error_SS
totalSS