Permalink
Browse files

Merge remote-tracking branch 'remotes/upstream/master'

tidy up a bit
Conflicts:
	config.py
	utils/ImplicitFeedbackFunctions.py
	utils/Model.py
	utils/SVDModel.py
  • Loading branch information...
2 parents e4b43fd + efc7030 commit f60d21b7dd0c31eca03f5fd6dd0d11f4395b95f5 @Ykid Ykid committed Aug 2, 2013
View
@@ -38,6 +38,11 @@ Data/
*.Rout
*.Rhistory
+#################
+## Idea
+#################
+*.idea
+
#################
## Eclipse
#################
@@ -169,7 +174,6 @@ publish/
*.pubxml
# NuGet Packages Directory
-## TODO: If you have NuGet Package Restore enabled, uncomment the next line
#packages/
# Windows Azure Build Output
@@ -1,7 +1,7 @@
Hybrid Movie Recommendation System Documentation
==============================================
-This program is a hybrid recommendation system. This documentation is written for those who wish to modify the program in some manner or join the effect.
+This program is a hybrid recommendation system. This documentation is written for those who wish to modify the program in some manner or join the effort.
Program Structure
----------------------------------------------
@@ -8,6 +8,7 @@ RMSEPath = args[6]
model.type= args[7]
input1 = args[8]
+library(Metrics)
dataTrain = read.csv(trainPath, sep="\t")
dataCV = read.csv(CVPath, sep="\t")
@@ -17,10 +18,7 @@ if(model.type=="OLS"){
## Ordinary Least Squares
library(ipred)
fit = lm(y~0 + .,data=dataTrain)
- errFit = errorest(y~0+.,data=dataTrain,model=lm)
summary(fit)
- print(errFit)
- error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
@@ -30,10 +28,7 @@ if(model.type=="OLSI"){
library(ipred)
formula = paste("y~0 + (.)^",input1,sep="")
fit = lm(y~0 + (.)^2,data=dataTrain)
- errFit = errorest(y~0+(.)^2,data=dataTrain,model=lm)
summary(fit)
- print(errFit)
- error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
@@ -44,16 +39,9 @@ if(model.type=="RR"){
library(ridge)
input1 = as.numeric(input1)
fit = linearRidge(y~0+.,data=dataTrain,nPCs=input1)
- ridgeModel = function(formula, data) {
- mod <- linearRidge(formula, data=data,nPCs=input1)
- function(newdata) predict(mod, newdata)
- }
- errFit = errorest(y~0+.,data=dataTrain,model=ridgeModel)
print(fit)
print("Ridge lambdas")
print(fit$lambda)
- print(errFit)
- error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
@@ -64,10 +52,7 @@ if(model.type=="Lasso"){
y = data.matrix(dataTrain$y)
drops = c("y")
x = data.matrix(dataTrain[,!(names(dataTrain) %in% drops)])
- errFit = cv.glmnet(x,y)
- print(errFit)
- error = sqrt(mean(errFit$cvm))
- fit = glmnet(x,y)
+ fit = cv.glmnet(x,y)
dataCVMat = data.matrix(dataCV[,!(names(dataCV) %in% drops)])
CVPredictions = predict(fit,dataCVMat)
TestPredictions= predict(fit,as.matrix(dataTest))
@@ -77,10 +62,7 @@ if(model.type=="BRT"){
library(ipred)
## Bagged Regression Trees
fit = bagging(y~0+.,data=dataTrain)
- errFit = errorest(y~0+.,data=dataTrain,model=bagging)
print(fit)
- print(errFit)
- error = errFit$error
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
@@ -93,34 +75,19 @@ if(model.type=="BMAR"){
drops = c("y")
x = dataTrain[,!(names(dataTrain) %in% drops)]
fit = bicreg(x, y)
- errBicReg = function(formula,data){
- y = data$y
- drops = c("y")
- x = data[,!(names(data) %in% drops)]
- bicreg(x,y)
- }
summary(fit)
- errFit = errorest(y~0+.,data=dataTrain,model=errBicReg)
- print(errFit)
- error = errFit$error
cvp = predict(fit,dataCV)
tp = predict(fit,dataTest)
CVPredictions = unlist(cvp[1])
- testPreidctions = unlist(tp[1] )
+ TestPredictions = unlist(tp[1] )
}
if(model.type=="RFR"){
library(randomForest)
library(ipred)
## Random Forest
- fit = randomForest(y ~0+., data=dataTrain,importance=TRUE, sampsize=1000, ntree=100)
- randFor = function(formula,data){
- randomForest(y ~0+., data=data,importance=TRUE, sampsize=1000, ntree=100)
- }
- errFit = errorest(y~0+.,data=dataTrain,model=randFor)
- print(errFit)
- error = errFit$error
+ fit = randomForest(y ~0+., data=dataTrain,importance=TRUE, ntree=100)
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
@@ -130,7 +97,7 @@ if(model.type=="CIRF"){
## Not Working
library(party)
library(languageR)
- fit <- cforest(y ~ 0 + ., data = dataTrain)
+ fit <- cforest(y ~ 0 + ., data = dataTrain)
}
if(model.type=="GBRT"){
@@ -143,13 +110,12 @@ if(model.type=="GBRT"){
print(cvm)
mstop(cvm)
fit <- blackboost(y ~ 0+., data = dataTrain,control = boost_control(mstop = mstop(cvm)))
- error = min(cvm)
CVPredictions = predict(fit,dataCV)
TestPredictions= predict(fit,dataTest)
}
-
-
+error=rmse(dataCV$y,CVPredictions)
+print(error)
write(CVPredictions, file = predCV, ncolumns=1)
write(TestPredictions, file = predTest, ncolumns=1)
View
@@ -26,7 +26,7 @@ def setupHybridTrial(hybridOriginalPath,strTrial,modelBootPath,CVPredictionPaths
bootCV = modelBootPath + \
'CV' + '_t' + strTrial
buildTrainingMatrixFromPredictions(bootCV,hybridOriginal,
- CVPredictionPaths,grabCSVColumnFunc)
+ CVPredictionPaths,grabCSVColumnFunc,2)
buildPredictorMatrixFromPredictions(testPredictionPaths,
grabCSVColumnFunc,hybridPredict)
bootsplitFunc(hybridOriginal,hybridOriginal + '_tmp',
@@ -42,13 +42,13 @@ def setupHybridTrial(hybridOriginalPath,strTrial,modelBootPath,CVPredictionPaths
hybridOriginalPath +
'test_t' + strTrial,True)
-def buildTrainingMatrixFromPredictions(fullSet,outputPath,predictorPaths,grabCSVColumnFunc):
+def buildTrainingMatrixFromPredictions(fullSet,outputPath,predictorPaths,grabCSVColumnFunc,masterColumn):
#-------------------------------------------------
# Takes in the prediction of various models on CV data
# Through CVPredictionPaths array
# Generates a txt file that is a matrix for training Hybrid
#-------------------------------------------------
- predictionArrays = [grabCSVColumnFunc(fullSet,2)]
+ predictionArrays = [grabCSVColumnFunc(fullSet,masterColumn)]
for predictPath in predictorPaths:
predictionArrays.append(grabCSVColumnFunc(predictPath,2))
toWrite = []
View
@@ -1,14 +1,17 @@
import hybrid
from SynthModel import SynthModel
-def setupSynthesize(utils,CVPredictionPaths,testPredictionPaths,configModel,trials,modelList,mproc,processes):
+def setupSynthesize(utils,CVPredictionPaths,testPredictionPaths,split,random,configModel,trials,modelList,mproc,processes):
processes = []
for trial in range(0,trials):
strTrial = str(trial)
p = mproc.Process(target=synthSetupTrial,
args=(utils.SYNTH_ORIGINAL_PATH,strTrial,
utils.HYBRID_BOOT_PATH,
+ utils.SYNTH_BOOT_PATH,
CVPredictionPaths[trial],
testPredictionPaths[trial],
+ split,random,
+ utils.bootsplit,
utils.grabCSVColumn,
hybrid.buildTrainingMatrixFromPredictions,
hybrid.buildPredictorMatrixFromPredictions,
@@ -22,17 +25,22 @@ def setupSynthesize(utils,CVPredictionPaths,testPredictionPaths,configModel,tria
p.join()
-def synthSetupTrial(synthOriginalPath,strTrial,hybridBootPath,CVPredictionPaths,testPredictionPaths,grabCSVColumnFunc,buildTrainingMatrixFromPredictionsFunc,buildPredictorMatrixFromPredictionsFunc,addHeaderFunc):
+def synthSetupTrial(synthOriginalPath,strTrial,hybridBootPath,synthBootPath,CVPredictionPaths,testPredictionPaths,split,random,bootsplitFunc,grabCSVColumnFunc,buildTrainingMatrixFromPredictionsFunc,buildPredictorMatrixFromPredictionsFunc,addHeaderFunc):
synthOriginal = synthOriginalPath \
+ 'train_t' + strTrial
synthPredict = synthOriginalPath \
+ 'test_t' + strTrial
-
- buildTrainingMatrixFromPredictionsFunc(hybridBootPath +
- 'CV_t' + strTrial + '_tmp', synthOriginal + '_tmp',
- CVPredictionPaths,grabCSVColumnFunc)
+ bootCV = hybridBootPath + 'CV_t' + strTrial + '_tmp'
+ buildTrainingMatrixFromPredictionsFunc(bootCV,synthOriginal,
+ CVPredictionPaths,grabCSVColumnFunc,0)
buildPredictorMatrixFromPredictionsFunc(testPredictionPaths,
grabCSVColumnFunc,synthPredict + '_tmp')
- addHeaderFunc(synthOriginal + '_tmp', synthOriginal,False)
- addHeaderFunc(synthPredict + '_tmp', synthPredict,True)
-
+ bootsplitFunc(synthOriginal,synthOriginal + '_tmp',
+ synthBootPath + 'train_t' + strTrial + '_tmp',
+ synthBootPath + 'CV_t' + strTrial + '_tmp',
+ split,random)
+ addHeaderFunc(synthBootPath + 'train_t' + strTrial + '_tmp',
+ synthBootPath + 'train_t' + strTrial ,False)
+ addHeaderFunc(synthBootPath + 'CV_t' + strTrial + '_tmp',
+ synthBootPath + 'CV_t' + strTrial ,False)
+ addHeaderFunc(synthPredict + '_tmp', synthPredict,True)
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+ <component name="NewModuleRootManager" inherit-compiler-output="true">
+ <exclude-output />
+ <content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
+ </content>
+ <orderEntry type="inheritedJdk" />
+ <orderEntry type="sourceFolder" forTests="false" />
+ </component>
+</module>
+
View
23 LICENSE
@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Christopher Rackauckas
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
View
Binary file not shown.
View
Binary file not shown.
View
Binary file not shown.
@@ -1,88 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Converts data in a triple format "id1 id2 id3 target" (like often used in recommender systems for rating prediction) into the libfm format.
-#
-# Author: Steffen Rendle, http://www.libfm.org/
-# modified: 2012-12-27
-#
-# History
-# 2012-12-27: header is not printed
-#
-# Copyright 2010-2012 Steffen Rendle, see license.txt for more information
-
-use Getopt::Long;
-use strict;
-
-srand();
-
-
-my $file_in;
-my $has_header = 0;
-my $target_column = undef;
-my $_delete_column = "";
-my $offset = 0; # where to start counting for indices. For libsvm one should start with 1; libfm can deal with 0.
-my $separator = " ";
-
-# example
-# ./triple_format_to_libfm.pl --in train.txt,test.txt --header 0 --target_column 2 --delete_column 3,4,5,6,7 --offset 0
-
-
-GetOptions(
- 'in=s' => \$file_in,
- 'header=i' => \$has_header,
- 'target_column=i' => \$target_column,
- 'delete_column=s' => \$_delete_column,
- 'offset=i' => \$offset,
- 'separator=s' => \$separator,
-);
-
-(defined $target_column) || die "no target column specified";
-
-my @files = split(/[,;]/, $file_in);
-my %delete_column;
-foreach my $c (split(/[,;]/, $_delete_column)) {
- $delete_column{int($c)} = 1;
-}
-
-my %id;
-my $id_cntr = $offset;
-
-
-foreach my $file_name (@files) {
- my $file_out = $file_name . ".libfm";
- print "transforming file $file_name to $file_out...";
- my $num_triples = 0;
-
- open my $IN, '<' , $file_name;
- open my $OUT, '>' , $file_out;
- if ($has_header) {
- $_ = <$IN>;
-# print {$OUT} $_;
- }
- while (<$IN>) {
- chomp;
- if ($_ ne "") {
- my @data = split /$separator/;
- ($#data >= $target_column) || die "not enough values in line $num_triples, expected at least $target_column values\nfound $_\n";
- my $out_str = $data[$target_column];
- my $out_col_id = 0; ## says which column in the input a field corresponds to after "deleting" the "delete_column", i.e. it is a counter over the #$data-field in @data assuming that some of the columns have been deleted; one can see this as the "group" id
- for (my $i = 0; $i <= $#data; $i++) {
- if (($i != $target_column) && (! exists $delete_column{$i})) {
- my $col_id = $out_col_id . " " . $data[$i]; ## this id holds the unique id of $data[$i] (also w.r.t. its group)
- if (! exists $id{$col_id}) {
- $id{$col_id} = $id_cntr;
- $id_cntr++;
- }
- my $libfm_id = $id{$col_id};
- $out_str .= " " . $libfm_id . ":1";
- $out_col_id++;
- }
- }
- print {$OUT} $out_str, "\n";
- }
- }
- close $OUT;
- close $IN;
- print "\n";
-}
-
View
@@ -16,7 +16,7 @@ def postProcess(os,utils, DE_EFFECT,trials,userMovieRating,RMSEPaths):
winner = pickWinner(trials,RMSEPaths)
print("Best trial: " + str(winner[0]))
- print("Best Synth Boot/K-Fold RMSE: " + str(winner[1]))
+ print("Best Synth CV-RMSE: " + str(winner[1]))
trialOutput = utils.TRIAL_OUTPUT_PATH + 't' + str(winner[0])
os.system('cp ' + trialOutput + ' ' + utils.OUTPUT_PATH)
@@ -57,4 +57,4 @@ def pickWinner(trials,RMSEPaths):
bestTrial = i
bestRMSE = RMSE
- return(bestTrial,bestRMSE)
+ return bestTrial,bestRMSE
Oops, something went wrong.

0 comments on commit f60d21b

Please sign in to comment.