Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/upstream/master'
Browse files Browse the repository at this point in the history
tidy up a bit
Conflicts:
	config.py
	utils/ImplicitFeedbackFunctions.py
	utils/Model.py
	utils/SVDModel.py
  • Loading branch information
Ykid committed Aug 2, 2013
2 parents e4b43fd + efc7030 commit f60d21b
Show file tree
Hide file tree
Showing 35 changed files with 915 additions and 538 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Expand Up @@ -38,6 +38,11 @@ Data/
*.Rout
*.Rhistory

#################
## Idea
#################
*.idea

#################
## Eclipse
#################
Expand Down Expand Up @@ -169,7 +174,6 @@ publish/
*.pubxml

# NuGet Packages Directory
## TODO: If you have NuGet Package Restore enabled, uncomment the next line
#packages/

# Windows Azure Build Output
Expand Down
2 changes: 1 addition & 1 deletion Documentation/Documentation.md
@@ -1,7 +1,7 @@
Hybrid Movie Recommendation System Documentation
==============================================

This program is a hybrid recommendation system. This documentation is written for those who wish to modify the program in some manner or join the effect.
This program is a hybrid recommendation system. This documentation is written for those who wish to modify the program in some manner or join the effort.

Program Structure
----------------------------------------------
Expand Down
48 changes: 7 additions & 41 deletions Hybrid/basicEnsembles.R → Hybrid/ensembles.R
Expand Up @@ -8,6 +8,7 @@ RMSEPath = args[6]
model.type= args[7]
input1 = args[8]

library(Metrics)

dataTrain = read.csv(trainPath, sep="\t")
dataCV = read.csv(CVPath, sep="\t")
Expand All @@ -17,10 +18,7 @@ if(model.type=="OLS"){
## Ordinary Least Squares
library(ipred)
fit = lm(y~0 + .,data=dataTrain)
errFit = errorest(y~0+.,data=dataTrain,model=lm)
summary(fit)
print(errFit)
error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
Expand All @@ -30,10 +28,7 @@ if(model.type=="OLSI"){
library(ipred)
formula = paste("y~0 + (.)^",input1,sep="")
fit = lm(y~0 + (.)^2,data=dataTrain)
errFit = errorest(y~0+(.)^2,data=dataTrain,model=lm)
summary(fit)
print(errFit)
error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
Expand All @@ -44,16 +39,9 @@ if(model.type=="RR"){
library(ridge)
input1 = as.numeric(input1)
fit = linearRidge(y~0+.,data=dataTrain,nPCs=input1)
ridgeModel = function(formula, data) {
mod <- linearRidge(formula, data=data,nPCs=input1)
function(newdata) predict(mod, newdata)
}
errFit = errorest(y~0+.,data=dataTrain,model=ridgeModel)
print(fit)
print("Ridge lambdas")
print(fit$lambda)
print(errFit)
error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
Expand All @@ -64,10 +52,7 @@ if(model.type=="Lasso"){
y = data.matrix(dataTrain$y)
drops = c("y")
x = data.matrix(dataTrain[,!(names(dataTrain) %in% drops)])
errFit = cv.glmnet(x,y)
print(errFit)
error = sqrt(mean(errFit$cvm))
fit = glmnet(x,y)
fit = cv.glmnet(x,y)
dataCVMat = data.matrix(dataCV[,!(names(dataCV) %in% drops)])
CVPredictions = predict(fit,dataCVMat)
TestPredictions= predict(fit,as.matrix(dataTest))
Expand All @@ -77,10 +62,7 @@ if(model.type=="BRT"){
library(ipred)
## Bagged Regression Trees
fit = bagging(y~0+.,data=dataTrain)
errFit = errorest(y~0+.,data=dataTrain,model=bagging)
print(fit)
print(errFit)
error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
Expand All @@ -93,34 +75,19 @@ if(model.type=="BMAR"){
drops = c("y")
x = dataTrain[,!(names(dataTrain) %in% drops)]
fit = bicreg(x, y)
errBicReg = function(formula,data){
y = data$y
drops = c("y")
x = data[,!(names(data) %in% drops)]
bicreg(x,y)
}
summary(fit)
errFit = errorest(y~0+.,data=dataTrain,model=errBicReg)
print(errFit)
error = errFit$error
cvp = predict(fit,dataCV)
tp = predict(fit,dataTest)
CVPredictions = unlist(cvp[1])
testPreidctions = unlist(tp[1] )
TestPredictions = unlist(tp[1] )
}


if(model.type=="RFR"){
library(randomForest)
library(ipred)
## Random Forest
fit = randomForest(y ~0+., data=dataTrain,importance=TRUE, sampsize=1000, ntree=100)
randFor = function(formula,data){
randomForest(y ~0+., data=data,importance=TRUE, sampsize=1000, ntree=100)
}
errFit = errorest(y~0+.,data=dataTrain,model=randFor)
print(errFit)
error = errFit$error
fit = randomForest(y ~0+., data=dataTrain,importance=TRUE, ntree=100)
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
Expand All @@ -130,7 +97,7 @@ if(model.type=="CIRF"){
## Not Working
library(party)
library(languageR)
fit <- cforest(y ~ 0 + ., data = dataTrain)
fit <- cforest(y ~ 0 + ., data = dataTrain)
}

if(model.type=="GBRT"){
Expand All @@ -143,13 +110,12 @@ if(model.type=="GBRT"){
print(cvm)
mstop(cvm)
fit <- blackboost(y ~ 0+., data = dataTrain,control = boost_control(mstop = mstop(cvm)))
error = min(cvm)
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}



error=rmse(dataCV$y,CVPredictions)
print(error)

write(CVPredictions, file = predCV, ncolumns=1)
write(TestPredictions, file = predTest, ncolumns=1)
Expand Down
6 changes: 3 additions & 3 deletions Hybrid/hybrid.py
Expand Up @@ -26,7 +26,7 @@ def setupHybridTrial(hybridOriginalPath,strTrial,modelBootPath,CVPredictionPaths
bootCV = modelBootPath + \
'CV' + '_t' + strTrial
buildTrainingMatrixFromPredictions(bootCV,hybridOriginal,
CVPredictionPaths,grabCSVColumnFunc)
CVPredictionPaths,grabCSVColumnFunc,2)
buildPredictorMatrixFromPredictions(testPredictionPaths,
grabCSVColumnFunc,hybridPredict)
bootsplitFunc(hybridOriginal,hybridOriginal + '_tmp',
Expand All @@ -42,13 +42,13 @@ def setupHybridTrial(hybridOriginalPath,strTrial,modelBootPath,CVPredictionPaths
hybridOriginalPath +
'test_t' + strTrial,True)

def buildTrainingMatrixFromPredictions(fullSet,outputPath,predictorPaths,grabCSVColumnFunc):
def buildTrainingMatrixFromPredictions(fullSet,outputPath,predictorPaths,grabCSVColumnFunc,masterColumn):
#-------------------------------------------------
# Takes in the prediction of various models on CV data
# Through CVPredictionPaths array
# Generates a txt file that is a matrix for training Hybrid
#-------------------------------------------------
predictionArrays = [grabCSVColumnFunc(fullSet,2)]
predictionArrays = [grabCSVColumnFunc(fullSet,masterColumn)]
for predictPath in predictorPaths:
predictionArrays.append(grabCSVColumnFunc(predictPath,2))
toWrite = []
Expand Down
26 changes: 17 additions & 9 deletions Hybrid/synthesize.py
@@ -1,14 +1,17 @@
import hybrid
from SynthModel import SynthModel
def setupSynthesize(utils,CVPredictionPaths,testPredictionPaths,configModel,trials,modelList,mproc,processes):
def setupSynthesize(utils,CVPredictionPaths,testPredictionPaths,split,random,configModel,trials,modelList,mproc,processes):
processes = []
for trial in range(0,trials):
strTrial = str(trial)
p = mproc.Process(target=synthSetupTrial,
args=(utils.SYNTH_ORIGINAL_PATH,strTrial,
utils.HYBRID_BOOT_PATH,
utils.SYNTH_BOOT_PATH,
CVPredictionPaths[trial],
testPredictionPaths[trial],
split,random,
utils.bootsplit,
utils.grabCSVColumn,
hybrid.buildTrainingMatrixFromPredictions,
hybrid.buildPredictorMatrixFromPredictions,
Expand All @@ -22,17 +25,22 @@ def setupSynthesize(utils,CVPredictionPaths,testPredictionPaths,configModel,tria
p.join()


def synthSetupTrial(synthOriginalPath,strTrial,hybridBootPath,CVPredictionPaths,testPredictionPaths,grabCSVColumnFunc,buildTrainingMatrixFromPredictionsFunc,buildPredictorMatrixFromPredictionsFunc,addHeaderFunc):
def synthSetupTrial(synthOriginalPath,strTrial,hybridBootPath,synthBootPath,CVPredictionPaths,testPredictionPaths,split,random,bootsplitFunc,grabCSVColumnFunc,buildTrainingMatrixFromPredictionsFunc,buildPredictorMatrixFromPredictionsFunc,addHeaderFunc):
synthOriginal = synthOriginalPath \
+ 'train_t' + strTrial
synthPredict = synthOriginalPath \
+ 'test_t' + strTrial

buildTrainingMatrixFromPredictionsFunc(hybridBootPath +
'CV_t' + strTrial + '_tmp', synthOriginal + '_tmp',
CVPredictionPaths,grabCSVColumnFunc)
bootCV = hybridBootPath + 'CV_t' + strTrial + '_tmp'
buildTrainingMatrixFromPredictionsFunc(bootCV,synthOriginal,
CVPredictionPaths,grabCSVColumnFunc,0)
buildPredictorMatrixFromPredictionsFunc(testPredictionPaths,
grabCSVColumnFunc,synthPredict + '_tmp')
addHeaderFunc(synthOriginal + '_tmp', synthOriginal,False)
addHeaderFunc(synthPredict + '_tmp', synthPredict,True)

bootsplitFunc(synthOriginal,synthOriginal + '_tmp',
synthBootPath + 'train_t' + strTrial + '_tmp',
synthBootPath + 'CV_t' + strTrial + '_tmp',
split,random)
addHeaderFunc(synthBootPath + 'train_t' + strTrial + '_tmp',
synthBootPath + 'train_t' + strTrial ,False)
addHeaderFunc(synthBootPath + 'CV_t' + strTrial + '_tmp',
synthBootPath + 'CV_t' + strTrial ,False)
addHeaderFunc(synthPredict + '_tmp', synthPredict,True)
12 changes: 12 additions & 0 deletions HybridMovieRecommendationSystem.iml
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

23 changes: 23 additions & 0 deletions LICENSE
@@ -0,0 +1,23 @@
The MIT License (MIT)

Copyright (c) 2013 Christopher Rackauckas

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.



Binary file removed Models/SVDFeature/svd_feature
Binary file not shown.
Binary file removed Models/SVDFeature/svd_feature_infer
Binary file not shown.
Binary file removed Models/SVDFeature/tools/combine_ugroup
Binary file not shown.
Binary file removed Models/SVDFeature/tools/kddcup_combine_ugroup
Binary file not shown.
Binary file removed Models/SVDFeature/tools/line_reorder
Binary file not shown.
Binary file removed Models/SVDFeature/tools/line_shuffle
Binary file not shown.
Binary file removed Models/SVDFeature/tools/make_feature_buffer
Binary file not shown.
Binary file removed Models/SVDFeature/tools/make_ugroup_buffer
Binary file not shown.
Binary file removed Models/SVDFeature/tools/svdpp_randorder
Binary file not shown.
Binary file removed Models/libFM/convert
Binary file not shown.
Binary file removed Models/libFM/libFM
Binary file not shown.
Binary file removed Models/libFM/transpose
Binary file not shown.
88 changes: 0 additions & 88 deletions Models/libFM/triple_format_to_libfm.pl

This file was deleted.

4 changes: 2 additions & 2 deletions PostProcess/post.py
Expand Up @@ -16,7 +16,7 @@ def postProcess(os,utils, DE_EFFECT,trials,userMovieRating,RMSEPaths):

winner = pickWinner(trials,RMSEPaths)
print("Best trial: " + str(winner[0]))
print("Best Synth Boot/K-Fold RMSE: " + str(winner[1]))
print("Best Synth CV-RMSE: " + str(winner[1]))
trialOutput = utils.TRIAL_OUTPUT_PATH + 't' + str(winner[0])
os.system('cp ' + trialOutput + ' ' + utils.OUTPUT_PATH)

Expand Down Expand Up @@ -57,4 +57,4 @@ def pickWinner(trials,RMSEPaths):
bestTrial = i
bestRMSE = RMSE

return(bestTrial,bestRMSE)
return bestTrial,bestRMSE

0 comments on commit f60d21b

Please sign in to comment.