In [None]:
using DataFrames, PyPlot

In [None]:
# load data
crime = readtable("crime.csv");

In [None]:
# clean data
counties = Set((crime[:County]))
reporting_counties = String[]
nonreporting_counties = String[]

for county in counties
    # println("$county reported for $(length(crime[crime[:County].==county,:County])) years.")
    if length(crime[crime[:County].==county,:County]) == 26
        push!(reporting_counties, county)
    else
        push!(nonreporting_counties, county)
    end
end

reported_crime_all_years = Array(Bool, size(crime,1))
for (i,county) in enumerate(crime[:County])
    reported_crime_all_years[i] = ~(county in nonreporting_counties)
end

crime = crime[reported_crime_all_years,:];

In [None]:
# just Tompkins county
tompkins = crime[crime[:County].=="Tompkins",:];
n = size(tompkins,1)

# predict crime in Tompkins county

In [None]:
# how about just using the year?
X = [tompkins[:Year].data ones(n)]
y = tompkins[:Index_Count].data
w = X\y

tompkins[:pred_linear] = X*w

In [None]:
clf()
plot(tompkins[:Year], tompkins[:Index_Count], "ro", label="Index_Count"),
plot(tompkins[:Year], tompkins[:pred_linear], "b-", label="linear model")
legend()
xlabel("Year")
ylabel("Crime")

# Autoregressive models

In [None]:
# how about using the level of crime last year? (called ``lagged outcome'')
X = [tompkins[:Index_Count].data[2:end] ones(n-1)]
y = tompkins[:Index_Count].data[1:end-1]
w = X\y

tompkins[:pred_ar1] = vcat(X*w, tompkins[:pred_linear][end]);

In [None]:
clf()
plot(tompkins[:Year], tompkins[:Index_Count], "ro", label="Index_Count"),
plot(tompkins[:Year], tompkins[:pred_linear], "b-", label="linear model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1][1:end-1], "y-", label="AR1 model")
legend()
xlabel("Year")
ylabel("Crime")

In [None]:
# how about using the year *and* the level of crime last year? (called ``lagged outcome'')
X = [tompkins[:Year].data[1:end-1] tompkins[:Index_Count].data[2:end] ones(n-1)]
y = tompkins[:Index_Count].data[1:end-1]
w = X\y

tompkins[:pred_ar1_lin] = vcat(X*w, tompkins[:pred_linear][end]);

In [None]:
clf()
plot(tompkins[:Year], tompkins[:Index_Count], "ro", label="Index_Count"),
plot(tompkins[:Year], tompkins[:pred_linear], "b-", label="linear model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1][1:end-1], "y-", label="AR1 model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1_lin][1:end-1], "g-", label="AR + linear model")
legend()
xlabel("Year")
ylabel("Crime")

# Smoothed models

In [None]:
α = 1
n = length(tompkins[:Year])
X = zeros(n,n)
for i=1:n
    for j=1:n
        X[i,j] = exp(-(tompkins[:Year][i]-tompkins[:Year][j])^2)
    end
end
y = tompkins[:Index_Count].data
w = X\y

tompkins[:pred_smooth] = X*w;

In [None]:
clf()
plot(tompkins[:Year], tompkins[:Index_Count], "ro", label="Index_Count"),
plot(tompkins[:Year], tompkins[:pred_linear], "b-", label="linear model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1][1:end-1], "y-", label="AR1 model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1_lin][1:end-1], "g-", label="AR + linear model")
plot(tompkins[:Year], tompkins[:pred_smooth], "c-", label="smoothed model")
legend()
xlabel("Year")
ylabel("Crime")

In [None]:
α = 1
n = length(tompkins[:Year])
nknots = Int(n/2)
X = zeros(n,nknots)
for i=1:n
    for j=1:nknots
        X[i,j] = exp(-(tompkins[:Year][i]-tompkins[:Year][2*j])^2)
    end
end
X = [X ones(n)]
y = tompkins[:Index_Count].data
w = X\y

tompkins[:pred_smooth] = X*w;

In [None]:
clf()
plot(tompkins[:Year], tompkins[:Index_Count], "ro", label="Index_Count"),
plot(tompkins[:Year], tompkins[:pred_linear], "b-", label="linear model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1][1:end-1], "y-", label="AR1 model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1_lin][1:end-1], "g-", label="AR + linear model")
plot(tompkins[:Year], tompkins[:pred_smooth], "c-", label="smoothed model")
legend()
xlabel("Year")
ylabel("Crime")

# regularize + smooth

In [None]:
α = 1
n = length(tompkins[:Year])
X = zeros(n,n)
for i=1:n
    for j=1:n
        X[i,j] = exp(-(tompkins[:Year][i]-tompkins[:Year][j])^2)
    end
end
y = tompkins[:Index_Count].data - tompkins[:pred_linear]
w = (X'*X + eye(n))\X'*y

tompkins[:pred_smooth_reg] = X*w + tompkins[:pred_linear];

In [None]:
clf()
plot(tompkins[:Year], tompkins[:Index_Count], "ro", label="Index_Count"),
plot(tompkins[:Year], tompkins[:pred_linear], "b-", label="linear model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1][1:end-1], "y-", label="AR1 model")
plot(tompkins[:Year][1:end-1], tompkins[:pred_ar1_lin][1:end-1], "g-", label="AR + linear model")
plot(tompkins[:Year], tompkins[:pred_smooth], "c-", label="smoothed model")
plot(tompkins[:Year], tompkins[:pred_smooth_reg], "m-", label="reg smoothed model")
legend()
xlabel("Year")
ylabel("Crime")