diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/pom.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/pom.xml
deleted file mode 100644
index 6d1e4310d..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/pom.xml
+++ /dev/null
@@ -1,101 +0,0 @@
-
-
-
-
-
- 4.0.0
-
-
- org.apache.mahout
- mahout
- 0.7
- ../pom.xml
-
-
- mahout-buildtools
- Mahout Build Tools
-
- jar
-
-
-
- setup-eclipse-workspace
-
- ${basedir}/../workspace
-
-
- process-test-sources
-
-
- org.apache.maven.plugins
- maven-eclipse-plugin
- 2.8
- false
-
-
- setup.eclipse.workspace
- process-test-sources
-
- configure-workspace
-
-
-
-
- ${eclipse.workspace.dir}
- file:Eclipse-Lucene-Codestyle.xml
-
-
-
- org.apache.maven.plugins
- maven-antrun-plugin
- false
-
-
- org.apache.ant
- ant-nodeps
- 1.7.1
-
-
- org.apache.ant
- ant-trax
- 1.7.1
-
-
-
-
- setup.workspace
- validate
-
-
-
-
-
-
-
-
- run
-
-
-
-
-
-
-
-
-
-
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/findbugs-exclude.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/findbugs-exclude.xml
deleted file mode 100644
index 9d17e0991..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/findbugs-exclude.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-checkstyle-suppressions.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-checkstyle-suppressions.xml
deleted file mode 100644
index 41f8fe372..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-checkstyle-suppressions.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-checkstyle.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-checkstyle.xml
deleted file mode 100644
index 9b807972c..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-checkstyle.xml
+++ /dev/null
@@ -1,282 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-eclipse-checkstyle b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-eclipse-checkstyle
deleted file mode 100644
index 9c76a6444..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-eclipse-checkstyle
+++ /dev/null
@@ -1,27 +0,0 @@
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-eclipse-pmd b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-eclipse-pmd
deleted file mode 100644
index f1bd7be03..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-eclipse-pmd
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
- true
-
-
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-pmd-ruleset.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-pmd-ruleset.xml
deleted file mode 100644
index d5a412d2f..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/src/main/resources/mahout-pmd-ruleset.xml
+++ /dev/null
@@ -1,189 +0,0 @@
-
-
-
- PMD Plugin preferences rule set
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/target/maven-archiver/pom.properties b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/target/maven-archiver/pom.properties
deleted file mode 100644
index 45966a97a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/buildtools/target/maven-archiver/pom.properties
+++ /dev/null
@@ -1,5 +0,0 @@
-#Generated by Maven
-#Tue Jun 12 14:04:37 IST 2012
-version=0.7
-groupId=org.apache.mahout
-artifactId=mahout-buildtools
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/pom.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/pom.xml
deleted file mode 100644
index 7baf17453..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/pom.xml
+++ /dev/null
@@ -1,244 +0,0 @@
-
-
-
-
-
- 4.0.0
-
-
- org.apache.mahout
- mahout
- 0.7
- ../pom.xml
-
-
-
- mahout-core
- Mahout Core
- Scalable machine learning libraries
-
- jar
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
- UTF-8
- 1.6
- 1.6
- true
-
-
-
- org.apache.maven.plugins
- maven-antrun-plugin
-
-
- compile
-
-
-
-
-
-
-
- run
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- test-jar
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-assembly-plugin
-
-
- job
- package
-
- single
-
-
-
- src/main/assembly/job.xml
-
-
-
-
-
-
-
- maven-javadoc-plugin
-
-
-
- maven-source-plugin
-
-
-
- org.apache.maven.plugins
- maven-remote-resources-plugin
-
- ../src/main/appended-resources
-
- org.apache:apache-jar-resource-bundle:1.4
-
-
- supplemental-models.xml
-
-
-
-
-
-
-
-
-
-
-
- ${project.groupId}
- mahout-math
-
-
-
- ${project.groupId}
- mahout-math
- test-jar
- test
-
-
-
-
- org.codehaus.jackson
- jackson-core-asl
-
-
- org.codehaus.jackson
- jackson-mapper-asl
-
-
-
- org.slf4j
- slf4j-api
-
-
-
- org.slf4j
- slf4j-jcl
- test
-
-
-
- commons-lang
- commons-lang
-
-
-
- com.thoughtworks.xstream
- xstream
-
-
-
- org.apache.lucene
- lucene-core
-
-
-
- org.apache.lucene
- lucene-analyzers
-
-
-
- org.apache.mahout.commons
- commons-cli
-
-
-
- org.apache.commons
- commons-math
-
-
-
- junit
- junit
- test
-
-
-
- org.easymock
- easymock
- test
-
-
-
-
-
-
- hadoop-0.20
-
-
- !hadoop.version
-
-
-
-
- org.apache.hadoop
- hadoop-core
-
-
-
-
- hadoop-0.23
-
-
- hadoop.version
-
-
-
-
- org.apache.hadoop
- hadoop-common
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-common
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
-
-
-
-
-
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/assembly/job.xml b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/assembly/job.xml
deleted file mode 100644
index ca50fed81..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/assembly/job.xml
+++ /dev/null
@@ -1,37 +0,0 @@
-
- job
-
- jar
-
- false
-
-
- true
- runtime
- /
-
- org.apache.hadoop:hadoop-core
-
-
-
-
-
- ${basedir}/target/classes
- /
-
- *.jar
-
-
-
- ${basedir}/target/classes
- /
-
- driver.classes.default.props
-
-
-
-
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/Version.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/Version.java
deleted file mode 100644
index 5f3c879dd..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/Version.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout;
-
-import com.google.common.base.Charsets;
-import com.google.common.io.Resources;
-
-import java.io.IOException;
-
-public final class Version {
-
- private Version() {
- }
-
- public static String version() {
- return Version.class.getPackage().getImplementationVersion();
- }
-
- public static String versionFromResource() throws IOException {
- return Resources.toString(Resources.getResource("version"), Charsets.UTF_8);
- }
-
- public static void main(String[] args) throws IOException {
- System.out.println(version() + ' ' + versionFromResource());
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java
deleted file mode 100644
index ff88270ec..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.Queue;
-
-/**
- * base class for queues holding the top or min k elements of all elements they have been offered
- */
-abstract class FixedSizePriorityQueue {
-
- private final int k;
- private final Comparator super T> queueingComparator;
- private final Comparator super T> sortingComparator;
- private final Queue queue;
-
- FixedSizePriorityQueue(int k, Comparator super T> comparator) {
- Preconditions.checkArgument(k > 0);
- this.k = k;
- Preconditions.checkNotNull(comparator);
- this.queueingComparator = queueingComparator(comparator);
- this.sortingComparator = sortingComparator(comparator);
- this.queue = new PriorityQueue(k + 1, queueingComparator);
- }
-
- abstract Comparator super T> queueingComparator(Comparator super T> stdComparator);
- abstract Comparator super T> sortingComparator(Comparator super T> stdComparator);
-
- public void offer(T item) {
- if (queue.size() < k) {
- queue.add(item);
- } else if (queueingComparator.compare(item, queue.peek()) > 0) {
- queue.add(item);
- queue.poll();
- }
- }
-
- public boolean isEmpty() {
- return queue.isEmpty();
- }
-
- public int size() {
- return queue.size();
- }
-
- public List retrieve() {
- List topItems = Lists.newArrayList(queue);
- Collections.sort(topItems, sortingComparator);
- return topItems;
- }
-
- protected T peek() {
- return queue.peek();
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java
deleted file mode 100644
index f39d02dc5..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-import java.util.Collections;
-import java.util.Comparator;
-
-/**
- * this class will preserve the k minimum elements of all elements it has been offered
- */
-public class MinK extends FixedSizePriorityQueue {
-
- public MinK(int k, Comparator super T> comparator) {
- super(k, comparator);
- }
-
- @Override
- protected Comparator super T> queueingComparator(Comparator super T> stdComparator) {
- return Collections.reverseOrder(stdComparator);
- }
-
- @Override
- protected Comparator super T> sortingComparator(Comparator super T> stdComparator) {
- return stdComparator;
- }
-
- public T greatestSmall() {
- return peek();
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
deleted file mode 100644
index f10ab5e04..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-public final class NoSuchItemException extends TasteException {
-
- public NoSuchItemException() { }
-
- public NoSuchItemException(long itemID) {
- this(String.valueOf(itemID));
- }
-
- public NoSuchItemException(String message) {
- super(message);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
deleted file mode 100644
index 8118bc817..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-public final class NoSuchUserException extends TasteException {
-
- public NoSuchUserException() { }
-
- public NoSuchUserException(long userID) {
- this(String.valueOf(userID));
- }
-
- public NoSuchUserException(String message) {
- super(message);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
deleted file mode 100644
index adc627cfd..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-import java.util.Collection;
-
-/**
- *
- * Implementations of this interface have state that can be periodically refreshed. For example, an
- * implementation instance might contain some pre-computed information that should be periodically refreshed.
- * The {@link #refresh(Collection)} method triggers such a refresh.
- *
- *
- *
- * All Taste components implement this. In particular,
- * {@link org.apache.mahout.cf.taste.recommender.Recommender}s do. Callers may want to call
- * {@link #refresh(Collection)} periodically to re-compute information throughout the system and bring it up
- * to date, though this operation may be expensive.
- *
- */
-public interface Refreshable {
-
- /**
- *
- * Triggers "refresh" -- whatever that means -- of the implementation. The general contract is that any
- * should always leave itself in a consistent, operational state, and that the refresh
- * atomically updates internal state from old to new.
- *
- *
- * @param alreadyRefreshed
- * s that are known to have already been
- * refreshed as a result of an initial call to a method on some
- * object. This ensure that objects in a refresh dependency graph aren't refreshed twice
- * needlessly.
- */
- void refresh(Collection alreadyRefreshed);
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
deleted file mode 100644
index 1792eff28..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-/**
- *
- * An exception thrown when an error occurs inside the Taste engine.
- *
- */
-public class TasteException extends Exception {
-
- public TasteException() { }
-
- public TasteException(String message) {
- super(message);
- }
-
- public TasteException(Throwable cause) {
- super(cause);
- }
-
- public TasteException(String message, Throwable cause) {
- super(message, cause);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java
deleted file mode 100644
index 44d70d641..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-import java.util.Collections;
-import java.util.Comparator;
-
-/**
- * this class will preserve the k maximum elements of all elements it has been offered
- */
-public class TopK extends FixedSizePriorityQueue {
-
- public TopK(int k, Comparator super T> comparator) {
- super(k, comparator);
- }
-
- @Override
- protected Comparator super T> queueingComparator(Comparator super T> stdComparator) {
- return stdComparator;
- }
-
- @Override
- protected Comparator super T> sortingComparator(Comparator super T> stdComparator) {
- return Collections.reverseOrder(stdComparator);
- }
-
- public T smallestGreat() {
- return peek();
- }
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
deleted file mode 100644
index 4e396176a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.common;
-
-/**
- *
- * A simple enum which gives symbolic names to the ideas of "weighted" and "unweighted", to make various API
- * calls which take a weighting parameter more readable.
- *
- */
-public enum Weighting {
-
- WEIGHTED,
- UNWEIGHTED
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
deleted file mode 100644
index 62b38f76a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.eval;
-
-import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-
-/**
- *
- * Implementations of this inner interface are simple helper classes which create a {@link DataModel} to be
- * used while evaluating a {@link org.apache.mahout.cf.taste.recommender.Recommender}.
- *
- * @see RecommenderBuilder
- * @see RecommenderEvaluator
- */
-public interface DataModelBuilder {
-
- /**
- *
- * Builds a {@link DataModel} implementation to be used in an evaluation, given training data.
- *
- *
- * @param trainingData
- * data to be used in the {@link DataModel}
- * @return {@link DataModel} based upon the given data
- */
- DataModel buildDataModel(FastByIDMap trainingData);
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
deleted file mode 100644
index 9c442fff8..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.eval;
-
-/**
- *
- * Implementations encapsulate information retrieval-related statistics about a
- * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
- *
- */
- double getNormalizedDiscountedCumulativeGain();
-
- /**
- * @return the fraction of all users for whom recommendations could be produced
- */
- double getReach();
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
deleted file mode 100644
index 1805092d6..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.eval;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.recommender.Recommender;
-
-/**
- *
- * Implementations of this inner interface are simple helper classes which create a {@link Recommender} to be
- * evaluated based on the given {@link DataModel}.
- *
- * Builds a {@link Recommender} implementation to be evaluated, using the given {@link DataModel}.
- *
- *
- * @param dataModel
- * {@link DataModel} to build the {@link Recommender} on
- * @return {@link Recommender} based upon the given {@link DataModel}
- * @throws TasteException
- * if an error occurs while accessing the {@link DataModel}
- */
- Recommender buildRecommender(DataModel dataModel) throws TasteException;
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
deleted file mode 100644
index bda37656c..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.eval;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.model.DataModel;
-
-/**
- *
- * Implementations of this interface evaluate the quality of a
- * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
- *
- * Evaluates the quality of a {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
- * The range of values that may be returned depends on the implementation, but lower values must
- * mean better recommendations, with 0 being the lowest / best possible evaluation, meaning a perfect match.
- * This method does not accept a {@link org.apache.mahout.cf.taste.recommender.Recommender} directly, but
- * rather a {@link RecommenderBuilder} which can build the
- * {@link org.apache.mahout.cf.taste.recommender.Recommender} to test on top of a given {@link DataModel}.
- *
- *
- *
- * Implementations will take a certain percentage of the preferences supplied by the given {@link DataModel}
- * as "training data". This is typically most of the data, like 90%. This data is used to produce
- * recommendations, and the rest of the data is compared against estimated preference values to see how much
- * the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s predicted preferences match the user's
- * real preferences. Specifically, for each user, this percentage of the user's ratings are used to produce
- * recommendatinos, and for each user, the remaining preferences are compared against the user's real
- * preferences.
- *
- *
- *
- * For large datasets, it may be desirable to only evaluate based on a small percentage of the data.
- * {@code evaluationPercentage} controls how many of the {@link DataModel}'s users are used in
- * evaluation.
- *
- *
- *
- * To be clear, {@code trainingPercentage} and {@code evaluationPercentage} are not related. They
- * do not need to add up to 1.0, for example.
- *
- *
- * @param recommenderBuilder
- * object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
- * @param dataModelBuilder
- * {@link DataModelBuilder} to use, or if null, a default {@link DataModel}
- * implementation will be used
- * @param dataModel
- * dataset to test on
- * @param trainingPercentage
- * percentage of each user's preferences to use to produce recommendations; the rest are compared
- * to estimated preference values to evaluate
- * {@link org.apache.mahout.cf.taste.recommender.Recommender} performance
- * @param evaluationPercentage
- * percentage of users to use in evaluation
- * @return a "score" representing how well the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s
- * estimated preferences match real values; lower scores mean a better match and 0 is a
- * perfect match
- * @throws TasteException
- * if an error occurs while accessing the {@link DataModel}
- */
- double evaluate(RecommenderBuilder recommenderBuilder,
- DataModelBuilder dataModelBuilder,
- DataModel dataModel,
- double trainingPercentage,
- double evaluationPercentage) throws TasteException;
-
- /**
- * @deprecated see {@link DataModel#getMaxPreference()}
- */
- @Deprecated
- float getMaxPreference();
-
- @Deprecated
- void setMaxPreference(float maxPreference);
-
- /**
- * @deprecated see {@link DataModel#getMinPreference()}
- */
- @Deprecated
- float getMinPreference();
-
- @Deprecated
- void setMinPreference(float minPreference);
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
deleted file mode 100644
index a7345aabf..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.eval;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.recommender.IDRescorer;
-
-/**
- *
- * Implementations collect information retrieval-related statistics on a
- * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s performance, including precision, recall and
- * f-measure.
- *
- *
- *
- * See Information retrieval.
- */
-public interface RecommenderIRStatsEvaluator {
-
- /**
- * @param recommenderBuilder
- * object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
- * @param dataModelBuilder
- * {@link DataModelBuilder} to use, or if null, a default {@link DataModel} implementation will be
- * used
- * @param dataModel
- * dataset to test on
- * @param rescorer
- * if any, to use when computing recommendations
- * @param at
- * as in, "precision at 5". The number of recommendations to consider when evaluating precision,
- * etc.
- * @param relevanceThreshold
- * items whose preference value is at least this value are considered "relevant" for the purposes
- * of computations
- * @return {@link IRStatistics} with resulting precision, recall, etc.
- * @throws TasteException
- * if an error occurs while accessing the {@link DataModel}
- */
- IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
- DataModelBuilder dataModelBuilder,
- DataModel dataModel,
- IDRescorer rescorer,
- int at,
- double relevanceThreshold,
- double evaluationPercentage) throws TasteException;
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
deleted file mode 100644
index b27d1adb1..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.eval;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-
-/**
- * Implementations of this interface determine the items that are considered relevant,
- * and splits data into a training and test subset, for purposes of precision/recall
- * tests as implemented by implementations of {@link RecommenderIRStatsEvaluator}.
- */
-public interface RelevantItemsDataSplitter {
-
- /**
- * During testing, relevant items are removed from a particular users' preferences,
- * and a model is build using this user's other preferences and all other users.
- *
- * @param at Maximum number of items to be removed
- * @param relevanceThreshold Minimum strength of preference for an item to be considered
- * relevant
- * @return IDs of relevant items
- */
- FastIDSet getRelevantItemsIDs(long userID,
- int at,
- double relevanceThreshold,
- DataModel dataModel) throws TasteException;
-
- /**
- * Adds a single user and all their preferences to the training model.
- *
- * @param userID ID of user whose preferences we are trying to predict
- * @param relevantItemIDs IDs of items considered relevant to that user
- * @param trainingUsers the database of training preferences to which we will
- * append the ones for otherUserID.
- * @param otherUserID for whom we are adding preferences to the trianing model
- */
- void processOtherUser(long userID,
- FastIDSet relevantItemIDs,
- FastByIDMap trainingUsers,
- long otherUserID,
- DataModel dataModel) throws TasteException;
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java
deleted file mode 100644
index 4d58851e4..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Varint;
-
-/** A {@link org.apache.hadoop.io.Writable} encapsulating an item ID and a count . */
-public final class EntityCountWritable extends VarLongWritable {
-
- private int count;
-
- public EntityCountWritable() {
- // do nothing
- }
-
- public EntityCountWritable(long itemID, int count) {
- super(itemID);
- this.count = count;
- }
-
- public EntityCountWritable(EntityCountWritable other) {
- this(other.get(), other.getCount());
- }
-
- public long getID() {
- return get();
- }
-
- public int getCount() {
- return count;
- }
-
- public void set(long id, int count) {
- set(id);
- this.count = count;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- super.write(out);
- Varint.writeUnsignedVarInt(count, out);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- super.readFields(in);
- count = Varint.readUnsignedVarInt(in);
- }
-
- @Override
- public int hashCode() {
- return super.hashCode() ^ count;
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof EntityCountWritable)) {
- return false;
- }
- EntityCountWritable other = (EntityCountWritable) o;
- return get() == other.get() && count == other.getCount();
- }
-
- @Override
- public String toString() {
- return get() + "\t" + count;
- }
-
- @Override
- public EntityCountWritable clone() {
- return new EntityCountWritable(get(), count);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
deleted file mode 100644
index 0106474c5..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import com.google.common.primitives.Longs;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.mahout.math.Varint;
-
-/** A {@link WritableComparable} encapsulating two items. */
-public final class EntityEntityWritable
- implements WritableComparable, Cloneable {
-
- private long aID;
- private long bID;
-
- public EntityEntityWritable() {
- // do nothing
- }
-
- public EntityEntityWritable(long aID, long bID) {
- this.aID = aID;
- this.bID = bID;
- }
-
- public long getAID() {
- return aID;
- }
-
- public long getBID() {
- return bID;
- }
-
- public void set(long aID, long bID) {
- this.aID = aID;
- this.bID = bID;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- Varint.writeSignedVarLong(aID, out);
- Varint.writeSignedVarLong(bID, out);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- aID = Varint.readSignedVarLong(in);
- bID = Varint.readSignedVarLong(in);
- }
-
- @Override
- public int compareTo(EntityEntityWritable that) {
- int aCompare = compare(aID, that.getAID());
- return aCompare == 0 ? compare(bID, that.getBID()) : aCompare;
- }
-
- private static int compare(long a, long b) {
- return a < b ? -1 : a > b ? 1 : 0;
- }
-
- @Override
- public int hashCode() {
- return Longs.hashCode(aID) + 31 * Longs.hashCode(bID);
- }
-
- @Override
- public boolean equals(Object o) {
- if (o instanceof EntityEntityWritable) {
- EntityEntityWritable that = (EntityEntityWritable) o;
- return aID == that.getAID() && bID == that.getBID();
- }
- return false;
- }
-
- @Override
- public String toString() {
- return aID + "\t" + bID;
- }
-
- @Override
- public EntityEntityWritable clone() {
- return new EntityEntityWritable(aID, bID);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
deleted file mode 100644
index 5bcc80578..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.VarLongWritable;
-
-/** A {@link org.apache.hadoop.io.Writable} encapsulating an item ID and a preference value. */
-public final class EntityPrefWritable extends VarLongWritable {
-
- private float prefValue;
-
- public EntityPrefWritable() {
- // do nothing
- }
-
- public EntityPrefWritable(long itemID, float prefValue) {
- super(itemID);
- this.prefValue = prefValue;
- }
-
- public EntityPrefWritable(EntityPrefWritable other) {
- this(other.get(), other.getPrefValue());
- }
-
- public long getID() {
- return get();
- }
-
- public float getPrefValue() {
- return prefValue;
- }
-
- public void set(long id, float prefValue) {
- set(id);
- this.prefValue = prefValue;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- super.write(out);
- out.writeFloat(prefValue);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- super.readFields(in);
- prefValue = in.readFloat();
- }
-
- @Override
- public int hashCode() {
- return super.hashCode() ^ RandomUtils.hashFloat(prefValue);
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof EntityPrefWritable)) {
- return false;
- }
- EntityPrefWritable other = (EntityPrefWritable) o;
- return get() == other.get() && prefValue == other.getPrefValue();
- }
-
- @Override
- public String toString() {
- return get() + "\t" + prefValue;
- }
-
- @Override
- public EntityPrefWritable clone() {
- return new EntityPrefWritable(get(), prefValue);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritableArrayWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritableArrayWritable.java
deleted file mode 100644
index 3e9161b6b..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritableArrayWritable.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.io.ArrayWritable;
-
-/**
- * An {@link ArrayWritable} holding {@link EntityPrefWritable}s
- */
-public class EntityPrefWritableArrayWritable extends ArrayWritable {
-
- public EntityPrefWritableArrayWritable() {
- super(EntityPrefWritable.class);
- }
-
- public EntityPrefWritableArrayWritable(EntityPrefWritable[] prefs) {
- super(EntityPrefWritable.class, prefs);
- }
-
- public EntityPrefWritable[] getPrefs() {
- return (EntityPrefWritable[]) toArray();
- }
-
- @Override
- public String toString() {
- return Arrays.toString(toStrings());
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
deleted file mode 100644
index a833d5931..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import com.google.common.collect.Lists;
-import org.apache.hadoop.io.Writable;
-import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.math.Varint;
-
-/**
- * A {@link Writable} which encapsulates a list of {@link RecommendedItem}s. This is the mapper (and reducer)
- * output, and represents items recommended to a user. The first item is the one whose estimated preference is
- * highest.
- */
-public final class RecommendedItemsWritable implements Writable {
-
- private List recommended;
-
- public RecommendedItemsWritable() {
- // do nothing
- }
-
- public RecommendedItemsWritable(List recommended) {
- this.recommended = recommended;
- }
-
- public List getRecommendedItems() {
- return recommended;
- }
-
- public void set(List recommended) {
- this.recommended = recommended;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(recommended.size());
- for (RecommendedItem item : recommended) {
- Varint.writeSignedVarLong(item.getItemID(), out);
- out.writeFloat(item.getValue());
- }
-
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- int size = in.readInt();
- recommended = Lists.newArrayListWithCapacity(size);
- for (int i = 0; i < size; i++) {
- long itemID = Varint.readSignedVarLong(in);
- float value = in.readFloat();
- RecommendedItem recommendedItem = new GenericRecommendedItem(itemID, value);
- recommended.add(recommendedItem);
- }
- }
-
- @Override
- public String toString() {
- StringBuilder result = new StringBuilder(200);
- result.append('[');
- boolean first = true;
- for (RecommendedItem item : recommended) {
- if (first) {
- first = false;
- } else {
- result.append(',');
- }
- result.append(String.valueOf(item.getItemID()));
- result.append(':');
- result.append(String.valueOf(item.getValue()));
- }
- result.append(']');
- return result.toString();
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
deleted file mode 100644
index e0ae35411..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import com.google.common.primitives.Longs;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.map.OpenIntLongHashMap;
-
-import java.util.regex.Pattern;
-
-/**
- * Some helper methods for the hadoop-related stuff in org.apache.mahout.cf.taste
- */
-public final class TasteHadoopUtils {
-
- /** Standard delimiter of textual preference data */
- private static final Pattern PREFERENCE_TOKEN_DELIMITER = Pattern.compile("[\t,]");
-
- private TasteHadoopUtils() {}
-
- /**
- * Splits a preference data line into string tokens
- */
- public static String[] splitPrefTokens(CharSequence line) {
- return PREFERENCE_TOKEN_DELIMITER.split(line);
- }
-
- /**
- * Maps a long to an int
- */
- public static int idToIndex(long id) {
- return 0x7FFFFFFF & Longs.hashCode(id);
- }
-
- /**
- * Reads a binary mapping file
- */
- public static OpenIntLongHashMap readItemIDIndexMap(String itemIDIndexPathStr, Configuration conf) {
- OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
- Path itemIDIndexPath = new Path(itemIDIndexPathStr);
- for (Pair record
- : new SequenceFileDirIterable(itemIDIndexPath,
- PathType.LIST,
- PathFilters.partFilter(),
- null,
- true,
- conf)) {
- indexItemIDMap.put(record.getFirst().get(), record.getSecond().get());
- }
- return indexItemIDMap;
- }
-
-
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
deleted file mode 100644
index fdb552ecd..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
-import org.apache.mahout.math.VarLongWritable;
-
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-public abstract class ToEntityPrefsMapper extends
- Mapper {
-
- public static final String TRANSPOSE_USER_ITEM = ToEntityPrefsMapper.class + "transposeUserItem";
- public static final String RATING_SHIFT = ToEntityPrefsMapper.class + "shiftRatings";
-
- private static final Pattern DELIMITER = Pattern.compile("[\t,]");
-
- private boolean booleanData;
- private boolean transpose;
- private final boolean itemKey;
- private float ratingShift;
-
- ToEntityPrefsMapper(boolean itemKey) {
- this.itemKey = itemKey;
- }
-
- @Override
- protected void setup(Context context) {
- Configuration jobConf = context.getConfiguration();
- booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
- transpose = jobConf.getBoolean(TRANSPOSE_USER_ITEM, false);
- ratingShift = Float.parseFloat(jobConf.get(RATING_SHIFT, "0.0"));
- }
-
- @Override
- public void map(LongWritable key,
- Text value,
- Context context) throws IOException, InterruptedException {
- String[] tokens = DELIMITER.split(value.toString());
- long userID = Long.parseLong(tokens[0]);
- long itemID = Long.parseLong(tokens[1]);
- if (itemKey ^ transpose) {
- // If using items as keys, and not transposing items and users, then users are items!
- // Or if not using items as keys (users are, as usual), but transposing items and users,
- // then users are items! Confused?
- long temp = userID;
- userID = itemID;
- itemID = temp;
- }
- if (booleanData) {
- context.write(new VarLongWritable(userID), new VarLongWritable(itemID));
- } else {
- float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) + ratingShift : 1.0f;
- context.write(new VarLongWritable(userID), new EntityPrefWritable(itemID, prefValue));
- }
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
deleted file mode 100644
index f147cf3ec..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-/**
- *
Input
- *
- *
- * Intended for use with {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat};
- * accepts line number / line pairs as
- * {@link org.apache.hadoop.io.LongWritable}/{@link org.apache.hadoop.io.Text} pairs.
- *
- *
- *
- * Each line is assumed to be of the form {@code userID,itemID,preference}, or {@code userID,itemID}.
- *
- *
- *
Output
- *
- *
- * Outputs the user ID as a {@link org.apache.mahout.math.VarLongWritable} mapped to the item ID and preference as a
- * {@link EntityPrefWritable}.
- *
- */
-public final class ToItemPrefsMapper extends ToEntityPrefsMapper {
-
- public ToItemPrefsMapper() {
- super(false);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java
deleted file mode 100644
index 78567ec41..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop;
-
-/**
- * The 'reverse' of {@link ToItemPrefsMapper}; outputs item IDs mapped to user-pref data.
- */
-public final class ToUserPrefsMapper extends ToEntityPrefsMapper {
-
- public ToUserPrefsMapper() {
- super(true);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java
deleted file mode 100644
index fc3336e44..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.als;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.map.OpenIntObjectHashMap;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-final class ALSUtils {
-
- private ALSUtils() {}
-
- static Vector readFirstRow(Path dir, Configuration conf) throws IOException {
- Iterator iterator = new SequenceFileDirValueIterator(dir,
- PathType.LIST,
- PathFilters.partFilter(),
- null,
- true,
- conf);
- return iterator.hasNext() ? iterator.next().get() : null;
- }
-
- static OpenIntObjectHashMap readMatrixByRows(Path dir, Configuration conf) {
- OpenIntObjectHashMap matrix = new OpenIntObjectHashMap();
-
- for (Pair pair :
- new SequenceFileDirIterable(dir, PathType.LIST, PathFilters.partFilter(), conf)) {
- int rowIndex = pair.getFirst().get();
- Vector row = pair.getSecond().get().clone();
- matrix.put(rowIndex, row);
- }
- return matrix;
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
deleted file mode 100644
index f84a29f96..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.als;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.RandomUtils;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-/**
- *
Split a recommendation dataset into a training and a test set
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
--input (path): Directory containing one or more text files with the dataset
- *
--output (path): path where output should go
- *
--trainingPercentage (double): percentage of the data to use as training set (optional, default 0.9)
- *
--probePercentage (double): percentage of the data to use as probe set (optional, default 0.1)
- *
- */
-public class DatasetSplitter extends AbstractJob {
-
- private static final String TRAINING_PERCENTAGE = DatasetSplitter.class.getName() + ".trainingPercentage";
- private static final String PROBE_PERCENTAGE = DatasetSplitter.class.getName() + ".probePercentage";
- private static final String PART_TO_USE = DatasetSplitter.class.getName() + ".partToUse";
-
- private static final Text INTO_TRAINING_SET = new Text("T");
- private static final Text INTO_PROBE_SET = new Text("P");
-
- private static final double DEFAULT_TRAINING_PERCENTAGE = 0.9;
- private static final double DEFAULT_PROBE_PERCENTAGE = 0.1;
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new DatasetSplitter(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOutputOption();
- addOption("trainingPercentage", "t", "percentage of the data to use as training set (default: "
- + DEFAULT_TRAINING_PERCENTAGE + ')', String.valueOf(DEFAULT_TRAINING_PERCENTAGE));
- addOption("probePercentage", "p", "percentage of the data to use as probe set (default: "
- + DEFAULT_PROBE_PERCENTAGE + ')', String.valueOf(DEFAULT_PROBE_PERCENTAGE));
-
- Map> parsedArgs = parseArguments(args);
- double trainingPercentage = Double.parseDouble(getOption("trainingPercentage"));
- double probePercentage = Double.parseDouble(getOption("probePercentage"));
- String tempDir = getOption("tempDir");
-
- Path markedPrefs = new Path(tempDir, "markedPreferences");
- Path trainingSetPath = new Path(getOutputPath(), "trainingSet");
- Path probeSetPath = new Path(getOutputPath(), "probeSet");
-
- Job markPreferences = prepareJob(getInputPath(), markedPrefs, TextInputFormat.class, MarkPreferencesMapper.class,
- Text.class, Text.class, SequenceFileOutputFormat.class);
- markPreferences.getConfiguration().set(TRAINING_PERCENTAGE, String.valueOf(trainingPercentage));
- markPreferences.getConfiguration().set(PROBE_PERCENTAGE, String.valueOf(probePercentage));
- boolean succeeded = markPreferences.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- Job createTrainingSet = prepareJob(markedPrefs, trainingSetPath, SequenceFileInputFormat.class,
- WritePrefsMapper.class, NullWritable.class, Text.class, TextOutputFormat.class);
- createTrainingSet.getConfiguration().set(PART_TO_USE, INTO_TRAINING_SET.toString());
- succeeded = createTrainingSet.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- Job createProbeSet = prepareJob(markedPrefs, probeSetPath, SequenceFileInputFormat.class,
- WritePrefsMapper.class, NullWritable.class, Text.class, TextOutputFormat.class);
- createProbeSet.getConfiguration().set(PART_TO_USE, INTO_PROBE_SET.toString());
- succeeded = createProbeSet.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- return 0;
- }
-
- static class MarkPreferencesMapper extends Mapper {
-
- private Random random;
- private double trainingBound;
- private double probeBound;
-
- @Override
- protected void setup(Context ctx) throws IOException, InterruptedException {
- random = RandomUtils.getRandom();
- trainingBound = Double.parseDouble(ctx.getConfiguration().get(TRAINING_PERCENTAGE));
- probeBound = trainingBound + Double.parseDouble(ctx.getConfiguration().get(PROBE_PERCENTAGE));
- }
-
- @Override
- protected void map(LongWritable key, Text text, Context ctx) throws IOException, InterruptedException {
- double randomValue = random.nextDouble();
- if (randomValue <= trainingBound) {
- ctx.write(INTO_TRAINING_SET, text);
- } else if (randomValue <= probeBound) {
- ctx.write(INTO_PROBE_SET, text);
- }
- }
- }
-
- static class WritePrefsMapper extends Mapper {
-
- private String partToUse;
-
- @Override
- protected void setup(Context ctx) throws IOException, InterruptedException {
- partToUse = ctx.getConfiguration().get(PART_TO_USE);
- }
-
- @Override
- protected void map(Text key, Text text, Context ctx) throws IOException, InterruptedException {
- if (partToUse.equals(key.toString())) {
- ctx.write(NullWritable.get(), text);
- }
- }
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
deleted file mode 100644
index 0c6e3f764..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.als;
-
-import com.google.common.io.Closeables;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.map.OpenIntObjectHashMap;
-
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.List;
-import java.util.Map;
-
-/**
- *
Measures the root-mean-squared error of a ratring matrix factorization against a test set.
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
--output (path): path where output should go
- *
--pairs (path): path containing the test ratings, each line must be userID,itemID,rating
- *
--userFeatures (path): path to the user feature matrix
- *
--itemFeatures (path): path to the item feature matrix
- *
- */
-public class FactorizationEvaluator extends AbstractJob {
-
- private static final String USER_FEATURES_PATH = RecommenderJob.class.getName() + ".userFeatures";
- private static final String ITEM_FEATURES_PATH = RecommenderJob.class.getName() + ".itemFeatures";
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new FactorizationEvaluator(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOption("userFeatures", null, "path to the user feature matrix", true);
- addOption("itemFeatures", null, "path to the item feature matrix", true);
- addOutputOption();
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- Path errors = getTempPath("errors");
-
- Job predictRatings = prepareJob(getInputPath(), errors, TextInputFormat.class, PredictRatingsMapper.class,
- DoubleWritable.class, NullWritable.class, SequenceFileOutputFormat.class);
-
- predictRatings.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
- predictRatings.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
- boolean succeeded = predictRatings.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- BufferedWriter writer = null;
- try {
- FileSystem fs = FileSystem.get(getOutputPath().toUri(), getConf());
- FSDataOutputStream outputStream = fs.create(getOutputPath("rmse.txt"));
- double rmse = computeRmse(errors);
- writer = new BufferedWriter(new OutputStreamWriter(outputStream));
- writer.write(String.valueOf(rmse));
- } finally {
- Closeables.closeQuietly(writer);
- }
-
- return 0;
- }
-
- protected double computeRmse(Path errors) {
- RunningAverage average = new FullRunningAverage();
- for (Pair entry :
- new SequenceFileDirIterable(errors, PathType.LIST, PathFilters.logsCRCFilter(),
- getConf())) {
- DoubleWritable error = entry.getFirst();
- average.addDatum(error.get() * error.get());
- }
-
- return Math.sqrt(average.getAverage());
- }
-
- public static class PredictRatingsMapper extends Mapper {
-
- private OpenIntObjectHashMap U;
- private OpenIntObjectHashMap M;
-
- @Override
- protected void setup(Context ctx) throws IOException, InterruptedException {
- Path pathToU = new Path(ctx.getConfiguration().get(USER_FEATURES_PATH));
- Path pathToM = new Path(ctx.getConfiguration().get(ITEM_FEATURES_PATH));
-
- U = ALSUtils.readMatrixByRows(pathToU, ctx.getConfiguration());
- M = ALSUtils.readMatrixByRows(pathToM, ctx.getConfiguration());
- }
-
- @Override
- protected void map(LongWritable key, Text value, Context ctx) throws IOException, InterruptedException {
-
- String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
- int userID = Integer.parseInt(tokens[0]);
- int itemID = Integer.parseInt(tokens[1]);
- double rating = Double.parseDouble(tokens[2]);
-
- if (U.containsKey(userID) && M.containsKey(itemID)) {
- double estimate = U.get(userID).dot(M.get(itemID));
- double err = rating - estimate;
- ctx.write(new DoubleWritable(err), NullWritable.get());
- }
- }
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
deleted file mode 100644
index 7dc3b7988..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
+++ /dev/null
@@ -1,329 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.als;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.common.mapreduce.MergeVectorsCombiner;
-import org.apache.mahout.common.mapreduce.MergeVectorsReducer;
-import org.apache.mahout.common.mapreduce.TransposeMapper;
-import org.apache.mahout.common.mapreduce.VectorSumReducer;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.als.AlternatingLeastSquaresSolver;
-import org.apache.mahout.math.als.ImplicitFeedbackAlternatingLeastSquaresSolver;
-import org.apache.mahout.math.map.OpenIntObjectHashMap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-/**
- *
MapReduce implementation of the two factorization algorithms described in
- *
- *
"Large-scale Parallel Collaborative Filtering for the Netflix Prize" available at
- * http://www.hpl.hp.com/personal/Robert_Schreiber/papers/2008%20AAIM%20Netflix/netflix_aaim08(submitted).pdf.
- *
- * "
Collaborative Filtering for Implicit Feedback Datasets" available at
- * http://research.yahoo.com/pub/2433
- *
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
--input (path): Directory containing one or more text files with the dataset
- *
--output (path): path where output should go
- *
--lambda (double): regularization parameter to avoid overfitting
- *
--userFeatures (path): path to the user feature matrix
- *
--itemFeatures (path): path to the item feature matrix
- *
- */
-public class ParallelALSFactorizationJob extends AbstractJob {
-
- private static final Logger log = LoggerFactory.getLogger(ParallelALSFactorizationJob.class);
-
- static final String NUM_FEATURES = ParallelALSFactorizationJob.class.getName() + ".numFeatures";
- static final String LAMBDA = ParallelALSFactorizationJob.class.getName() + ".lambda";
- static final String ALPHA = ParallelALSFactorizationJob.class.getName() + ".alpha";
- static final String FEATURE_MATRIX = ParallelALSFactorizationJob.class.getName() + ".featureMatrix";
-
- private boolean implicitFeedback;
- private int numIterations;
- private int numFeatures;
- private double lambda;
- private double alpha;
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new ParallelALSFactorizationJob(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOutputOption();
- addOption("lambda", null, "regularization parameter", true);
- addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
- addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
- addOption("numFeatures", null, "dimension of the feature space", true);
- addOption("numIterations", null, "number of iterations", true);
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- numFeatures = Integer.parseInt(getOption("numFeatures"));
- numIterations = Integer.parseInt(getOption("numIterations"));
- lambda = Double.parseDouble(getOption("lambda"));
- alpha = Double.parseDouble(getOption("alpha"));
- implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback"));
-
- /*
- * compute the factorization A = U M'
- *
- * where A (users x items) is the matrix of known ratings
- * U (users x features) is the representation of users in the feature space
- * M (items x features) is the representation of items in the feature space
- */
-
- /* create A' */
- Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(),
- TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class,
- VectorWritable.class, VectorSumReducer.class, IntWritable.class,
- VectorWritable.class, SequenceFileOutputFormat.class);
- itemRatings.setCombinerClass(VectorSumReducer.class);
- boolean succeeded = itemRatings.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- /* create A */
- Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(),
- TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
- VectorWritable.class);
- userRatings.setCombinerClass(MergeVectorsCombiner.class);
- succeeded = userRatings.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- //TODO this could be fiddled into one of the upper jobs
- Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
- AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
- IntWritable.class, VectorWritable.class);
- averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
- succeeded = averageItemRatings.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
- Vector averageRatings = ALSUtils.readFirstRow(getTempPath("averageRatings"), getConf());
-
- /* create an initial M */
- initializeM(averageRatings);
-
- for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) {
- /* broadcast M, read A row-wise, recompute U row-wise */
- log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
- runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1));
- /* broadcast U, read A' row-wise, recompute M row-wise */
- log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
- runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration));
- }
-
- return 0;
- }
-
- private void initializeM(Vector averageRatings) throws IOException {
- Random random = RandomUtils.getRandom();
-
- FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf());
- SequenceFile.Writer writer = null;
- try {
- writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class,
- VectorWritable.class);
-
- Iterator averages = averageRatings.iterateNonZero();
- while (averages.hasNext()) {
- Vector.Element e = averages.next();
- Vector row = new DenseVector(numFeatures);
- row.setQuick(0, e.get());
- for (int m = 1; m < numFeatures; m++) {
- row.setQuick(m, random.nextDouble());
- }
- writer.append(new IntWritable(e.index()), new VectorWritable(row));
- }
- } finally {
- Closeables.closeQuietly(writer);
- }
- }
-
- static class ItemRatingVectorsMapper extends Mapper {
- @Override
- protected void map(LongWritable offset, Text line, Context ctx) throws IOException, InterruptedException {
- String[] tokens = TasteHadoopUtils.splitPrefTokens(line.toString());
- int userID = Integer.parseInt(tokens[0]);
- int itemID = Integer.parseInt(tokens[1]);
- float rating = Float.parseFloat(tokens[2]);
-
- Vector ratings = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
- ratings.set(userID, rating);
-
- ctx.write(new IntWritable(itemID), new VectorWritable(ratings, true));
- }
- }
-
- private void runSolver(Path ratings, Path output, Path pathToUorI)
- throws ClassNotFoundException, IOException, InterruptedException {
-
- Class extends Mapper> solverMapper = implicitFeedback ?
- SolveImplicitFeedbackMapper.class : SolveExplicitFeedbackMapper.class;
-
- Job solverForUorI = prepareJob(ratings, output, SequenceFileInputFormat.class, solverMapper, IntWritable.class,
- VectorWritable.class, SequenceFileOutputFormat.class);
- Configuration solverConf = solverForUorI.getConfiguration();
- solverConf.set(LAMBDA, String.valueOf(lambda));
- solverConf.set(ALPHA, String.valueOf(alpha));
- solverConf.setInt(NUM_FEATURES, numFeatures);
- solverConf.set(FEATURE_MATRIX, pathToUorI.toString());
- boolean succeeded = solverForUorI.waitForCompletion(true);
- if (!succeeded)
- throw new IllegalStateException("Job failed!");
- }
-
- static class SolveExplicitFeedbackMapper extends Mapper {
-
- private double lambda;
- private int numFeatures;
-
- private OpenIntObjectHashMap UorM;
-
- private AlternatingLeastSquaresSolver solver;
-
- @Override
- protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
- lambda = Double.parseDouble(ctx.getConfiguration().get(LAMBDA));
- numFeatures = ctx.getConfiguration().getInt(NUM_FEATURES, -1);
- solver = new AlternatingLeastSquaresSolver();
-
- Path UOrIPath = new Path(ctx.getConfiguration().get(FEATURE_MATRIX));
-
- UorM = ALSUtils.readMatrixByRows(UOrIPath, ctx.getConfiguration());
- Preconditions.checkArgument(numFeatures > 0, "numFeatures was not set correctly!");
- }
-
- @Override
- protected void map(IntWritable userOrItemID, VectorWritable ratingsWritable, Context ctx)
- throws IOException, InterruptedException {
- Vector ratings = new SequentialAccessSparseVector(ratingsWritable.get());
- List featureVectors = Lists.newArrayList();
- Iterator interactions = ratings.iterateNonZero();
- while (interactions.hasNext()) {
- int index = interactions.next().index();
- featureVectors.add(UorM.get(index));
- }
-
- Vector uiOrmj = solver.solve(featureVectors, ratings, lambda, numFeatures);
-
- ctx.write(userOrItemID, new VectorWritable(uiOrmj));
- }
- }
-
- static class SolveImplicitFeedbackMapper extends Mapper {
-
- private ImplicitFeedbackAlternatingLeastSquaresSolver solver;
-
- @Override
- protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
- double lambda = Double.parseDouble(ctx.getConfiguration().get(LAMBDA));
- double alpha = Double.parseDouble(ctx.getConfiguration().get(ALPHA));
- int numFeatures = ctx.getConfiguration().getInt(NUM_FEATURES, -1);
-
- Path YPath = new Path(ctx.getConfiguration().get(FEATURE_MATRIX));
- OpenIntObjectHashMap Y = ALSUtils.readMatrixByRows(YPath, ctx.getConfiguration());
-
- solver = new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, Y);
-
- Preconditions.checkArgument(numFeatures > 0, "numFeatures was not set correctly!");
- }
-
- @Override
- protected void map(IntWritable userOrItemID, VectorWritable ratingsWritable, Context ctx)
- throws IOException, InterruptedException {
- Vector ratings = new SequentialAccessSparseVector(ratingsWritable.get());
-
- Vector uiOrmj = solver.solve(ratings);
-
- ctx.write(userOrItemID, new VectorWritable(uiOrmj));
- }
- }
-
- static class AverageRatingMapper extends Mapper {
- @Override
- protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException {
- RunningAverage avg = new FullRunningAverage();
- Iterator elements = v.get().iterateNonZero();
- while (elements.hasNext()) {
- avg.addDatum(elements.next().get());
- }
- Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
- vector.setQuick(r.get(), avg.getAverage());
- ctx.write(new IntWritable(0), new VectorWritable(vector));
- }
- }
-
- private Path pathToM(int iteration) {
- return iteration == numIterations - 1 ? getOutputPath("M") : getTempPath("M-" + iteration);
- }
-
- private Path pathToU(int iteration) {
- return iteration == numIterations - 1 ? getOutputPath("U") : getTempPath("U-" + iteration);
- }
-
- private Path pathToItemRatings() {
- return getTempPath("itemRatings");
- }
-
- private Path pathToUserRatings() {
- return getOutputPath("userRatings");
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
deleted file mode 100644
index 9ca0b1663..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.als;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Floats;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.common.TopK;
-import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.function.IntObjectProcedure;
-import org.apache.mahout.math.map.OpenIntObjectHashMap;
-import org.apache.mahout.math.set.OpenIntHashSet;
-
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-/**
- *
Computes the top-N recommendations per user from a decomposition of the rating matrix
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
--input (path): Directory containing the vectorized user ratings
- *
--output (path): path where output should go
- *
--numRecommendations (int): maximum number of recommendations per user
- *
--maxRating (double): maximum rating of an item
- *
--numFeatures (int): number of features to use for decomposition
- *
- */
-public class RecommenderJob extends AbstractJob {
-
- private static final String NUM_RECOMMENDATIONS = RecommenderJob.class.getName() + ".numRecommendations";
- private static final String USER_FEATURES_PATH = RecommenderJob.class.getName() + ".userFeatures";
- private static final String ITEM_FEATURES_PATH = RecommenderJob.class.getName() + ".itemFeatures";
- private static final String MAX_RATING = RecommenderJob.class.getName() + ".maxRating";
-
- static final int DEFAULT_NUM_RECOMMENDATIONS = 10;
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new RecommenderJob(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOption("userFeatures", null, "path to the user feature matrix", true);
- addOption("itemFeatures", null, "path to the item feature matrix", true);
- addOption("numRecommendations", null, "number of recommendations per user",
- String.valueOf(DEFAULT_NUM_RECOMMENDATIONS));
- addOption("maxRating", null, "maximum rating available", true);
- addOutputOption();
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- Job prediction = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, PredictionMapper.class,
- IntWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
- prediction.getConfiguration().setInt(NUM_RECOMMENDATIONS,
- Integer.parseInt(getOption("numRecommendations")));
- prediction.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
- prediction.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
- prediction.getConfiguration().set(MAX_RATING, getOption("maxRating"));
- boolean succeeded = prediction.waitForCompletion(true);
- if (!succeeded)
- return -1;
-
-
- return 0;
- }
-
- private static final Comparator BY_PREFERENCE_VALUE =
- new Comparator() {
- @Override
- public int compare(RecommendedItem one, RecommendedItem two) {
- return Floats.compare(one.getValue(), two.getValue());
- }
- };
-
- static class PredictionMapper
- extends Mapper {
-
- private OpenIntObjectHashMap U;
- private OpenIntObjectHashMap M;
-
- private int recommendationsPerUser;
- private float maxRating;
-
- @Override
- protected void setup(Context ctx) throws IOException, InterruptedException {
- recommendationsPerUser = ctx.getConfiguration().getInt(NUM_RECOMMENDATIONS,
- DEFAULT_NUM_RECOMMENDATIONS);
-
- Path pathToU = new Path(ctx.getConfiguration().get(USER_FEATURES_PATH));
- Path pathToM = new Path(ctx.getConfiguration().get(ITEM_FEATURES_PATH));
-
- U = ALSUtils.readMatrixByRows(pathToU, ctx.getConfiguration());
- M = ALSUtils.readMatrixByRows(pathToM, ctx.getConfiguration());
-
- maxRating = Float.parseFloat(ctx.getConfiguration().get(MAX_RATING));
- }
-
- @Override
- protected void map(IntWritable userIDWritable, VectorWritable ratingsWritable, Context ctx)
- throws IOException, InterruptedException {
-
- Vector ratings = ratingsWritable.get();
- final int userID = userIDWritable.get();
- final OpenIntHashSet alreadyRatedItems = new OpenIntHashSet(ratings.getNumNondefaultElements());
- final TopK topKItems = new TopK(recommendationsPerUser, BY_PREFERENCE_VALUE);
-
- Iterator ratingsIterator = ratings.iterateNonZero();
- while (ratingsIterator.hasNext()) {
- alreadyRatedItems.add(ratingsIterator.next().index());
- }
-
- M.forEachPair(new IntObjectProcedure() {
- @Override
- public boolean apply(int itemID, Vector itemFeatures) {
- if (!alreadyRatedItems.contains(itemID)) {
- double predictedRating = U.get(userID).dot(itemFeatures);
- topKItems.offer(new GenericRecommendedItem(itemID, (float) predictedRating));
- }
- return true;
- }
- });
-
- List recommendedItems = Lists.newArrayListWithExpectedSize(recommendationsPerUser);
- for (RecommendedItem topItem : topKItems.retrieve()) {
- recommendedItems.add(new GenericRecommendedItem(topItem.getItemID(), Math.min(topItem.getValue(), maxRating)));
- }
-
- if (!topKItems.isEmpty()) {
- ctx.write(userIDWritable, new RecommendedItemsWritable(recommendedItems));
- }
- }
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
deleted file mode 100644
index 0d8212c99..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import com.google.common.primitives.Floats;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.common.TopK;
-import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.common.iterator.FileLineIterable;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.function.DoubleFunction;
-import org.apache.mahout.math.map.OpenIntLongHashMap;
-
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Iterator;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- *
computes prediction values for each user
- *
- *
- * u = a user
- * i = an item not yet rated by u
- * N = all items similar to i (where similarity is usually computed by pairwisely comparing the item-vectors
- * of the user-item matrix)
- *
- * Prediction(u,i) = sum(all n from N: similarity(i,n) * rating(u,n)) / sum(all n from N: abs(similarity(i,n)))
- *
- */
-public final class AggregateAndRecommendReducer extends
- Reducer {
-
- private static final Logger log = LoggerFactory.getLogger(AggregateAndRecommendReducer.class);
-
- static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
- static final String NUM_RECOMMENDATIONS = "numRecommendations";
- static final int DEFAULT_NUM_RECOMMENDATIONS = 10;
- static final String ITEMS_FILE = "itemsFile";
-
- private boolean booleanData;
- private int recommendationsPerUser;
- private FastIDSet itemsToRecommendFor;
- private OpenIntLongHashMap indexItemIDMap;
-
- private static final float BOOLEAN_PREF_VALUE = 1.0f;
- private static final Comparator BY_PREFERENCE_VALUE =
- new Comparator() {
- @Override
- public int compare(RecommendedItem one, RecommendedItem two) {
- return Floats.compare(one.getValue(), two.getValue());
- }
- };
-
- @Override
- protected void setup(Context context) throws IOException {
- Configuration conf = context.getConfiguration();
- recommendationsPerUser = conf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
- booleanData = conf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
- indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf);
-
- String itemFilePathString = conf.get(ITEMS_FILE);
- if (itemFilePathString != null) {
- itemsToRecommendFor = new FastIDSet();
- for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) {
- try {
- itemsToRecommendFor.add(Long.parseLong(line));
- } catch (NumberFormatException nfe) {
- log.warn("itemsFile line ignored: {}", line);
- }
- }
- }
- }
-
- private static final DoubleFunction ABSOLUTE_VALUES = new DoubleFunction() {
- @Override
- public double apply(double value) {
- return value < 0 ? value * -1 : value;
- }
- };
-
- @Override
- protected void reduce(VarLongWritable userID,
- Iterable values,
- Context context) throws IOException, InterruptedException {
- if (booleanData) {
- reduceBooleanData(userID, values, context);
- } else {
- reduceNonBooleanData(userID, values, context);
- }
- }
-
- private void reduceBooleanData(VarLongWritable userID,
- Iterable values,
- Context context) throws IOException, InterruptedException {
- /* having boolean data, each estimated preference can only be 1,
- * however we can't use this to rank the recommended items,
- * so we use the sum of similarities for that. */
- Vector predictionVector = null;
- for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
- predictionVector = predictionVector == null
- ? prefAndSimilarityColumn.getSimilarityColumn()
- : predictionVector.plus(prefAndSimilarityColumn.getSimilarityColumn());
- }
- writeRecommendedItems(userID, predictionVector, context);
- }
-
- private void reduceNonBooleanData(VarLongWritable userID,
- Iterable values,
- Context context) throws IOException, InterruptedException {
- /* each entry here is the sum in the numerator of the prediction formula */
- Vector numerators = null;
- /* each entry here is the sum in the denominator of the prediction formula */
- Vector denominators = null;
- /* each entry here is the number of similar items used in the prediction formula */
- Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
-
- for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
- Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
- float prefValue = prefAndSimilarityColumn.getPrefValue();
- /* count the number of items used for each prediction */
- Iterator usedItemsIterator = simColumn.iterateNonZero();
- while (usedItemsIterator.hasNext()) {
- int itemIDIndex = usedItemsIterator.next().index();
- numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
- }
-
- numerators = numerators == null
- ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue)
- : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue));
-
- simColumn.assign(ABSOLUTE_VALUES);
- denominators = denominators == null ? simColumn : denominators.plus(simColumn);
- }
-
- if (numerators == null) {
- return;
- }
-
- Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
- Iterator iterator = numerators.iterateNonZero();
- while (iterator.hasNext()) {
- Vector.Element element = iterator.next();
- int itemIDIndex = element.index();
- /* preference estimations must be based on at least 2 datapoints */
- if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {
- /* compute normalized prediction */
- double prediction = element.get() / denominators.getQuick(itemIDIndex);
- recommendationVector.setQuick(itemIDIndex, prediction);
- }
- }
- writeRecommendedItems(userID, recommendationVector, context);
- }
-
- /**
- * find the top entries in recommendationVector, map them to the real itemIDs and write back the result
- */
- private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context)
- throws IOException, InterruptedException {
-
- TopK topKItems = new TopK(recommendationsPerUser, BY_PREFERENCE_VALUE);
-
- Iterator recommendationVectorIterator = recommendationVector.iterateNonZero();
- while (recommendationVectorIterator.hasNext()) {
- Vector.Element element = recommendationVectorIterator.next();
- int index = element.index();
- long itemID;
- if (indexItemIDMap != null && !indexItemIDMap.isEmpty()) {
- itemID = indexItemIDMap.get(index);
- } else { //we don't have any mappings, so just use the original
- itemID = index;
- }
- if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) {
- float value = (float) element.get();
- if (!Float.isNaN(value)) {
- topKItems.offer(new GenericRecommendedItem(itemID, value));
- }
- }
- }
-
- if (!topKItems.isEmpty()) {
- context.write(userID, new RecommendedItemsWritable(topKItems.retrieve()));
- }
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
deleted file mode 100644
index a31bf3015..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import com.google.common.collect.Lists;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Vector;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * we use a neat little trick to explicitly filter items for some users: we inject a NaN summand into the preference
- * estimation for those items, which makes {@link org.apache.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer}
- * automatically exclude them
- */
-public class ItemFilterAsVectorAndPrefsReducer
- extends Reducer {
- @Override
- protected void reduce(VarLongWritable itemID, Iterable values, Context ctx)
- throws IOException, InterruptedException {
-
- int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get());
- Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
- /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */
- vector.set(itemIDIndex, Double.NaN);
-
- List userIDs = Lists.newArrayList();
- List prefValues = Lists.newArrayList();
- for (VarLongWritable userID : values) {
- userIDs.add(userID.get());
- prefValues.add(1.0f);
- }
-
- ctx.write(new VarIntWritable(itemIDIndex), new VectorAndPrefsWritable(vector, userIDs, prefValues));
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java
deleted file mode 100644
index 6350f0c4c..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarLongWritable;
-
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-/**
- * map out all user/item pairs to filter, keyed by the itemID
- */
-public class ItemFilterMapper extends Mapper {
-
- private static final Pattern SEPARATOR = Pattern.compile("[\t,]");
-
- @Override
- protected void map(LongWritable key, Text line, Context ctx) throws IOException, InterruptedException {
- String[] tokens = SEPARATOR.split(line.toString());
- long userID = Long.parseLong(tokens[0]);
- long itemID = Long.parseLong(tokens[1]);
- ctx.write(new VarLongWritable(itemID), new VarLongWritable(userID));
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
deleted file mode 100644
index 5435f80dd..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.cf.taste.hadoop.ToEntityPrefsMapper;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-
-public final class ItemIDIndexMapper extends
- Mapper {
-
- private boolean transpose;
-
- @Override
- protected void setup(Context context) {
- Configuration jobConf = context.getConfiguration();
- transpose = jobConf.getBoolean(ToEntityPrefsMapper.TRANSPOSE_USER_ITEM, false);
- }
-
- @Override
- protected void map(LongWritable key,
- Text value,
- Context context) throws IOException, InterruptedException {
- String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
- long itemID = Long.parseLong(tokens[transpose ? 0 : 1]);
- int index = TasteHadoopUtils.idToIndex(itemID);
- context.write(new VarIntWritable(index), new VarLongWritable(itemID));
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
deleted file mode 100644
index 793c2ab0a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-
-public final class ItemIDIndexReducer extends
- Reducer {
-
- @Override
- protected void reduce(VarIntWritable index,
- Iterable possibleItemIDs,
- Context context) throws IOException, InterruptedException {
- long minimumItemID = Long.MAX_VALUE;
- for (VarLongWritable varLongWritable : possibleItemIDs) {
- long itemID = varLongWritable.get();
- if (itemID < minimumItemID) {
- minimumItemID = itemID;
- }
- }
- if (minimumItemID != Long.MAX_VALUE) {
- context.write(index, new VarLongWritable(minimumItemID));
- }
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java
deleted file mode 100644
index dbc29ca10..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Vector;
-
-/**
- * maps similar items and their preference values per user
- */
-public final class PartialMultiplyMapper extends
- Mapper {
-
- @Override
- protected void map(VarIntWritable key,
- VectorAndPrefsWritable vectorAndPrefsWritable,
- Context context) throws IOException, InterruptedException {
-
- Vector similarityMatrixColumn = vectorAndPrefsWritable.getVector();
- List userIDs = vectorAndPrefsWritable.getUserIDs();
- List prefValues = vectorAndPrefsWritable.getValues();
-
- VarLongWritable userIDWritable = new VarLongWritable();
- PrefAndSimilarityColumnWritable prefAndSimilarityColumn = new PrefAndSimilarityColumnWritable();
-
- for (int i = 0; i < userIDs.size(); i++) {
- long userID = userIDs.get(i);
- float prefValue = prefValues.get(i);
- if (!Float.isNaN(prefValue)) {
- prefAndSimilarityColumn.set(prefValue, similarityMatrixColumn);
- userIDWritable.set(userID);
- context.write(userIDWritable, prefAndSimilarityColumn);
- }
- }
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PrefAndSimilarityColumnWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PrefAndSimilarityColumnWritable.java
deleted file mode 100644
index 704c74a82..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PrefAndSimilarityColumnWritable.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-
-public final class PrefAndSimilarityColumnWritable implements Writable {
-
- private float prefValue;
- private Vector similarityColumn;
-
- public PrefAndSimilarityColumnWritable() {
- }
-
- public PrefAndSimilarityColumnWritable(float prefValue, Vector similarityColumn) {
- set(prefValue, similarityColumn);
- }
-
- public void set(float prefValue, Vector similarityColumn) {
- this.prefValue = prefValue;
- this.similarityColumn = similarityColumn;
- }
-
- public float getPrefValue() {
- return prefValue;
- }
-
- public Vector getSimilarityColumn() {
- return similarityColumn;
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- prefValue = in.readFloat();
- VectorWritable vw = new VectorWritable();
- vw.readFields(in);
- similarityColumn = vw.get();
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeFloat(prefValue);
- VectorWritable vw = new VectorWritable(similarityColumn);
- vw.setWritesLaxPrecision(true);
- vw.write(out);
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj instanceof PrefAndSimilarityColumnWritable) {
- PrefAndSimilarityColumnWritable other = (PrefAndSimilarityColumnWritable) obj;
- return prefValue == other.prefValue && similarityColumn.equals(other.similarityColumn);
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- return RandomUtils.hashFloat(prefValue) + 31 * similarityColumn.hashCode();
- }
-
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
deleted file mode 100644
index e67a4cb64..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob;
-import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;
-
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- *
Runs a completely distributed recommender job as a series of mapreduces.
- *
- *
Preferences in the input file should look like {@code userID, itemID[, preferencevalue]}
- *
- *
- * Preference value is optional to accommodate applications that have no notion of a preference value (that is, the user
- * simply expresses a preference for an item, but no degree of preference).
- *
- *
- *
- * The preference value is assumed to be parseable as a {@code double}. The user IDs and item IDs are
- * parsed as {@code long}s.
- *
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
--input(path): Directory containing one or more text files with the preference data
- *
--output(path): output path where recommender output should go
- *
--similarityClassname (classname): Name of vector similarity class to instantiate or a predefined similarity
- * from {@link org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasure}
- *
--usersFile (path): only compute recommendations for user IDs contained in this file (optional)
- *
--itemsFile (path): only include item IDs from this file in the recommendations (optional)
- *
--filterFile (path): file containing comma-separated userID,itemID pairs. Used to exclude the item from the
- * recommendations for that user (optional)
- *
--numRecommendations (integer): Number of recommendations to compute per user (10)
- *
--booleanData (boolean): Treat input data as having no pref values (false)
- *
--maxPrefsPerUser (integer): Maximum number of preferences considered per user in final recommendation phase (10)
- *
--maxSimilaritiesPerItem (integer): Maximum number of similarities considered per item (100)
- *
--minPrefsPerUser (integer): ignore users with less preferences than this in the similarity computation (1)
- *
--maxPrefsPerUserInItemSimilarity (integer): max number of preferences to consider per user in the item similarity computation phase,
- * users with more preferences will be sampled down (1000)
- *
--threshold (double): discard item pairs with a similarity value below this
- *
- *
- *
General command line options are documented in {@link AbstractJob}.
- *
- *
Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
- * arguments.
- */
-public final class RecommenderJob extends AbstractJob {
-
- public static final String BOOLEAN_DATA = "booleanData";
-
- private static final int DEFAULT_MAX_SIMILARITIES_PER_ITEM = 100;
- private static final int DEFAULT_MAX_PREFS_PER_USER = 1000;
- private static final int DEFAULT_MIN_PREFS_PER_USER = 1;
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOutputOption();
- addOption("numRecommendations", "n", "Number of recommendations per user",
- String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS));
- addOption("usersFile", null, "File of users to recommend for", null);
- addOption("itemsFile", null, "File of items to recommend for", null);
- addOption("filterFile", "f", "File containing comma-separated userID,itemID pairs. Used to exclude the item from "
- + "the recommendations for that user (optional)", null);
- addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
- addOption("maxPrefsPerUser", "mxp",
- "Maximum number of preferences considered per user in final recommendation phase",
- String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED));
- addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this in the similarity computation "
- + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
- addOption("maxSimilaritiesPerItem", "m", "Maximum number of similarities considered per item ",
- String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ITEM));
- addOption("maxPrefsPerUserInItemSimilarity", "mppuiis", "max number of preferences to consider per user in the "
- + "item similarity computation phase, users with more preferences will be sampled down (default: " +
- DEFAULT_MAX_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MAX_PREFS_PER_USER));
- addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, "
- + "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')', true);
- addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- Path outputPath = getOutputPath();
- int numRecommendations = Integer.parseInt(getOption("numRecommendations"));
- String usersFile = getOption("usersFile");
- String itemsFile = getOption("itemsFile");
- String filterFile = getOption("filterFile");
- boolean booleanData = Boolean.valueOf(getOption("booleanData"));
- int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
- int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
- int maxPrefsPerUserInItemSimilarity = Integer.parseInt(getOption("maxPrefsPerUserInItemSimilarity"));
- int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
- String similarityClassname = getOption("similarityClassname");
- double threshold = hasOption("threshold") ?
- Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD;
-
-
- Path prepPath = getTempPath("preparePreferenceMatrix");
- Path similarityMatrixPath = getTempPath("similarityMatrix");
- Path prePartialMultiplyPath1 = getTempPath("prePartialMultiply1");
- Path prePartialMultiplyPath2 = getTempPath("prePartialMultiply2");
- Path explicitFilterPath = getTempPath("explicitFilterPath");
- Path partialMultiplyPath = getTempPath("partialMultiply");
-
- AtomicInteger currentPhase = new AtomicInteger();
-
- int numberOfUsers = -1;
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[]{
- "--input", getInputPath().toString(),
- "--output", prepPath.toString(),
- "--maxPrefsPerUser", String.valueOf(maxPrefsPerUserInItemSimilarity),
- "--minPrefsPerUser", String.valueOf(minPrefsPerUser),
- "--booleanData", String.valueOf(booleanData),
- "--tempDir", getTempPath().toString()});
-
- numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf());
- }
-
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
-
- /* special behavior if phase 1 is skipped */
- if (numberOfUsers == -1) {
- numberOfUsers = (int) HadoopUtil.countRecords(new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS),
- PathType.LIST, null, getConf());
- }
-
- /* Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor this call to something like
- * new DistributedRowMatrix(...).rowSimilarity(...) */
- //calculate the co-occurrence matrix
- ToolRunner.run(getConf(), new RowSimilarityJob(), new String[]{
- "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(),
- "--output", similarityMatrixPath.toString(),
- "--numberOfColumns", String.valueOf(numberOfUsers),
- "--similarityClassname", similarityClassname,
- "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem),
- "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
- "--threshold", String.valueOf(threshold),
- "--tempDir", getTempPath().toString()});
- }
-
- //start the multiplication of the co-occurrence matrix by the user vectors
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job prePartialMultiply1 = prepareJob(
- similarityMatrixPath, prePartialMultiplyPath1, SequenceFileInputFormat.class,
- SimilarityMatrixRowWrapperMapper.class, VarIntWritable.class, VectorOrPrefWritable.class,
- Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
- SequenceFileOutputFormat.class);
- boolean succeeded = prePartialMultiply1.waitForCompletion(true);
- if (!succeeded)
- return -1;
- //continue the multiplication
- Job prePartialMultiply2 = prepareJob(new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS),
- prePartialMultiplyPath2, SequenceFileInputFormat.class, UserVectorSplitterMapper.class, VarIntWritable.class,
- VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
- SequenceFileOutputFormat.class);
- if (usersFile != null) {
- prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE, usersFile);
- }
- prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
- maxPrefsPerUser);
- succeeded = prePartialMultiply2.waitForCompletion(true);
- if (!succeeded)
- return -1;
- //finish the job
- Job partialMultiply = prepareJob(
- new Path(prePartialMultiplyPath1 + "," + prePartialMultiplyPath2), partialMultiplyPath,
- SequenceFileInputFormat.class, Mapper.class, VarIntWritable.class, VectorOrPrefWritable.class,
- ToVectorAndPrefReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
- SequenceFileOutputFormat.class);
- setS3SafeCombinedInputPath(partialMultiply, getTempPath(), prePartialMultiplyPath1, prePartialMultiplyPath2);
- succeeded = partialMultiply.waitForCompletion(true);
- if (!succeeded)
- return -1;
- }
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- //filter out any users we don't care about
- /* convert the user/item pairs to filter if a filterfile has been specified */
- if (filterFile != null) {
- Job itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class,
- ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class,
- ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
- SequenceFileOutputFormat.class);
- boolean succeeded = itemFiltering.waitForCompletion(true);
- if (!succeeded)
- return -1;
- }
-
- String aggregateAndRecommendInput = partialMultiplyPath.toString();
- if (filterFile != null) {
- aggregateAndRecommendInput += "," + explicitFilterPath;
- }
- //extract out the recommendations
- Job aggregateAndRecommend = prepareJob(
- new Path(aggregateAndRecommendInput), outputPath, SequenceFileInputFormat.class,
- PartialMultiplyMapper.class, VarLongWritable.class, PrefAndSimilarityColumnWritable.class,
- AggregateAndRecommendReducer.class, VarLongWritable.class, RecommendedItemsWritable.class,
- TextOutputFormat.class);
- Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration();
- if (itemsFile != null) {
- aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMS_FILE, itemsFile);
- }
-
- if (filterFile != null) {
- setS3SafeCombinedInputPath(aggregateAndRecommend, getTempPath(), partialMultiplyPath, explicitFilterPath);
- }
- setIOSort(aggregateAndRecommend);
- aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH,
- new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
- aggregateAndRecommendConf.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations);
- aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData);
- boolean succeeded = aggregateAndRecommend.waitForCompletion(true);
- if (!succeeded)
- return -1;
- }
-
- return 0;
- }
-
- private static void setIOSort(JobContext job) {
- Configuration conf = job.getConfiguration();
- conf.setInt("io.sort.factor", 100);
- String javaOpts = conf.get("mapred.map.child.java.opts"); // new arg name
- if (javaOpts == null) {
- javaOpts = conf.get("mapred.child.java.opts"); // old arg name
- }
- int assumedHeapSize = 512;
- if (javaOpts != null) {
- Matcher m = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(javaOpts);
- if (m.find()) {
- assumedHeapSize = Integer.parseInt(m.group(1));
- String megabyteOrGigabyte = m.group(2);
- if ("g".equalsIgnoreCase(megabyteOrGigabyte)) {
- assumedHeapSize *= 1024;
- }
- }
- }
- // Cap this at 1024MB now; see https://issues.apache.org/jira/browse/MAPREDUCE-2308
- conf.setInt("io.sort.mb", Math.min(assumedHeapSize / 2, 1024));
- // For some reason the Merger doesn't report status for a long time; increase
- // timeout when running these jobs
- conf.setInt("mapred.task.timeout", 60 * 60 * 1000);
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new RecommenderJob(), args);
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java
deleted file mode 100644
index 4cdf21e64..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-
-/**
- * maps a row of the similarity matrix to a {@link VectorOrPrefWritable}
- *
- * actually a column from that matrix has to be used but as the similarity matrix is symmetric,
- * we can use a row instead of having to transpose it
- */
-public final class SimilarityMatrixRowWrapperMapper extends
- Mapper {
-
- @Override
- protected void map(IntWritable key,
- VectorWritable value,
- Context context) throws IOException, InterruptedException {
- Vector similarityMatrixRow = value.get();
- /* remove self similarity */
- similarityMatrixRow.set(key.get(), Double.NaN);
- context.write(new VarIntWritable(key.get()), new VectorOrPrefWritable(similarityMatrixRow));
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java
deleted file mode 100644
index d01da3f6e..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-
-/**
- *
Input
- *
- *
- * Takes user IDs as {@link VarLongWritable} mapped to all associated item IDs and preference values, as
- * {@link EntityPrefWritable}s.
- *
- *
- *
Output
- *
- *
- * The same user ID mapped to a {@link RandomAccessSparseVector} representation of the same item IDs and
- * preference values. Item IDs are used as vector indexes; they are hashed into ints to work as indexes with
- * {@link TasteHadoopUtils#idToIndex(long)}. The mapping is remembered for later with a combination of
- * {@link ItemIDIndexMapper} and {@link ItemIDIndexReducer}.
- *
- */
-public final class ToUserVectorsReducer extends
- Reducer {
-
- public static final String MIN_PREFERENCES_PER_USER = ToUserVectorsReducer.class.getName()
- + ".minPreferencesPerUser";
-
- private int minPreferences;
-
- public enum Counters { USERS }
-
- @Override
- protected void setup(Context ctx) throws IOException, InterruptedException {
- super.setup(ctx);
- minPreferences = ctx.getConfiguration().getInt(MIN_PREFERENCES_PER_USER, 1);
- }
-
- @Override
- protected void reduce(VarLongWritable userID,
- Iterable itemPrefs,
- Context context) throws IOException, InterruptedException {
- Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
- for (VarLongWritable itemPref : itemPrefs) {
- int index = TasteHadoopUtils.idToIndex(itemPref.get());
- float value = itemPref instanceof EntityPrefWritable ? ((EntityPrefWritable) itemPref).getPrefValue() : 1.0f;
- userVector.set(index, value);
- }
-
- if (userVector.getNumNondefaultElements() >= minPreferences) {
- VectorWritable vw = new VectorWritable(userVector);
- vw.setWritesLaxPrecision(true);
- context.getCounter(Counters.USERS).increment(1);
- context.write(userID, vw);
- }
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
deleted file mode 100644
index 2f91f4c44..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.IOException;
-import java.util.List;
-
-import com.google.common.collect.Lists;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.Vector;
-
-public final class ToVectorAndPrefReducer extends
- Reducer {
-
- @Override
- protected void reduce(VarIntWritable key,
- Iterable values,
- Context context) throws IOException, InterruptedException {
-
- List userIDs = Lists.newArrayList();
- List prefValues = Lists.newArrayList();
- Vector similarityMatrixColumn = null;
- for (VectorOrPrefWritable value : values) {
- if (value.getVector() == null) {
- // Then this is a user-pref value
- userIDs.add(value.getUserID());
- prefValues.add(value.getValue());
- } else {
- // Then this is the column vector
- if (similarityMatrixColumn != null) {
- throw new IllegalStateException("Found two similarity-matrix columns for item index " + key.get());
- }
- similarityMatrixColumn = value.getVector();
- }
- }
-
- if (similarityMatrixColumn == null) {
- return;
- }
-
- VectorAndPrefsWritable vectorAndPrefs = new VectorAndPrefsWritable(similarityMatrixColumn, userIDs, prefValues);
- context.write(key, vectorAndPrefs);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
deleted file mode 100644
index faf8846bb..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import com.google.common.io.Closeables;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.cf.taste.common.TopK;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.common.iterator.FileLineIterable;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Iterator;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public final class UserVectorSplitterMapper extends
- Mapper {
-
- private static final Logger log = LoggerFactory.getLogger(UserVectorSplitterMapper.class);
-
- static final String USERS_FILE = "usersFile";
- static final String MAX_PREFS_PER_USER_CONSIDERED = "maxPrefsPerUserConsidered";
- static final int DEFAULT_MAX_PREFS_PER_USER_CONSIDERED = 10;
-
- private int maxPrefsPerUserConsidered;
- private FastIDSet usersToRecommendFor;
-
- @Override
- protected void setup(Context context) throws IOException {
- Configuration jobConf = context.getConfiguration();
- maxPrefsPerUserConsidered = jobConf.getInt(MAX_PREFS_PER_USER_CONSIDERED, DEFAULT_MAX_PREFS_PER_USER_CONSIDERED);
- String usersFilePathString = jobConf.get(USERS_FILE);
- if (usersFilePathString != null) {
- FSDataInputStream in = null;
- try {
- Path unqualifiedUsersFilePath = new Path(usersFilePathString);
- FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf);
- usersToRecommendFor = new FastIDSet();
- Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs);
- in = fs.open(usersFilePath);
- for (String line : new FileLineIterable(in)) {
- try {
- usersToRecommendFor.add(Long.parseLong(line));
- } catch (NumberFormatException nfe) {
- log.warn("usersFile line ignored: {}", line);
- }
- }
- } finally {
- Closeables.closeQuietly(in);
- }
- }
- }
-
- @Override
- protected void map(VarLongWritable key,
- VectorWritable value,
- Context context) throws IOException, InterruptedException {
- long userID = key.get();
- if (usersToRecommendFor != null && !usersToRecommendFor.contains(userID)) {
- return;
- }
- Vector userVector = maybePruneUserVector(value.get());
- Iterator it = userVector.iterateNonZero();
- VarIntWritable itemIndexWritable = new VarIntWritable();
- VectorOrPrefWritable vectorOrPref = new VectorOrPrefWritable();
- while (it.hasNext()) {
- Vector.Element e = it.next();
- itemIndexWritable.set(e.index());
- vectorOrPref.set(userID, (float) e.get());
- context.write(itemIndexWritable, vectorOrPref);
- }
- }
-
- private Vector maybePruneUserVector(Vector userVector) {
- if (userVector.getNumNondefaultElements() <= maxPrefsPerUserConsidered) {
- return userVector;
- }
-
- float smallestLargeValue = findSmallestLargeValue(userVector);
-
- // "Blank out" small-sized prefs to reduce the amount of partial products
- // generated later. They're not zeroed, but NaN-ed, so they come through
- // and can be used to exclude these items from prefs.
- Iterator it = userVector.iterateNonZero();
- while (it.hasNext()) {
- Vector.Element e = it.next();
- float absValue = Math.abs((float) e.get());
- if (absValue < smallestLargeValue) {
- e.set(Float.NaN);
- }
- }
-
- return userVector;
- }
-
- private float findSmallestLargeValue(Vector userVector) {
-
- TopK topPrefValues = new TopK(maxPrefsPerUserConsidered, new Comparator() {
- @Override
- public int compare(Float one, Float two) {
- return one.compareTo(two);
- }
- });
-
- Iterator it = userVector.iterateNonZero();
- while (it.hasNext()) {
- float absValue = Math.abs((float) it.next().get());
- topPrefValues.offer(absValue);
- }
- return topPrefValues.smallestGreat();
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
deleted file mode 100644
index 29bb4169a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import com.google.common.collect.Lists;
-import org.apache.hadoop.io.Writable;
-import org.apache.mahout.math.Varint;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-
-public final class VectorAndPrefsWritable implements Writable {
-
- private Vector vector;
- private List userIDs;
- private List values;
-
- public VectorAndPrefsWritable() {
- }
-
- public VectorAndPrefsWritable(Vector vector, List userIDs, List values) {
- this.vector = vector;
- this.userIDs = userIDs;
- this.values = values;
- }
-
- public Vector getVector() {
- return vector;
- }
-
- public List getUserIDs() {
- return userIDs;
- }
-
- public List getValues() {
- return values;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- VectorWritable vw = new VectorWritable(vector);
- vw.setWritesLaxPrecision(true);
- vw.write(out);
- Varint.writeUnsignedVarInt(userIDs.size(), out);
- for (int i = 0; i < userIDs.size(); i++) {
- Varint.writeSignedVarLong(userIDs.get(i), out);
- out.writeFloat(values.get(i));
- }
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- VectorWritable writable = new VectorWritable();
- writable.readFields(in);
- vector = writable.get();
- int size = Varint.readUnsignedVarInt(in);
- userIDs = Lists.newArrayListWithCapacity(size);
- values = Lists.newArrayListWithCapacity(size);
- for (int i = 0; i < size; i++) {
- userIDs.add(Varint.readSignedVarLong(in));
- values.add(in.readFloat());
- }
- }
-
- @Override
- public String toString() {
- return vector + "\t" + userIDs + '\t' + values;
- }
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java
deleted file mode 100644
index 9b95f67b3..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.item;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.mahout.math.Varint;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-
-public final class VectorOrPrefWritable implements Writable {
-
- private Vector vector;
- private long userID;
- private float value;
-
- public VectorOrPrefWritable() {
- }
-
- public VectorOrPrefWritable(Vector vector) {
- this.vector = vector;
- }
-
- public VectorOrPrefWritable(long userID, float value) {
- this.userID = userID;
- this.value = value;
- }
-
- public Vector getVector() {
- return vector;
- }
-
- public long getUserID() {
- return userID;
- }
-
- public float getValue() {
- return value;
- }
-
- public void set(Vector vector) {
- this.vector = vector;
- this.userID = Long.MIN_VALUE;
- this.value = Float.NaN;
- }
-
- public void set(long userID, float value) {
- this.vector = null;
- this.userID = userID;
- this.value = value;
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- if (vector == null) {
- out.writeBoolean(false);
- Varint.writeSignedVarLong(userID, out);
- out.writeFloat(value);
- } else {
- out.writeBoolean(true);
- VectorWritable vw = new VectorWritable(vector);
- vw.setWritesLaxPrecision(true);
- vw.write(out);
- }
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- boolean hasVector = in.readBoolean();
- if (hasVector) {
- VectorWritable writable = new VectorWritable();
- writable.readFields(in);
- set(writable.get());
- } else {
- long theUserID = Varint.readSignedVarLong(in);
- float theValue = in.readFloat();
- set(theUserID, theValue);
- }
- }
-
- @Override
- public String toString() {
- return vector == null ? userID + ":" + value : vector.toString();
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
deleted file mode 100644
index 2c6da306c..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.preparation;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.ToEntityPrefsMapper;
-import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.cf.taste.hadoop.item.ItemIDIndexMapper;
-import org.apache.mahout.cf.taste.hadoop.item.ItemIDIndexReducer;
-import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
-import org.apache.mahout.cf.taste.hadoop.item.ToUserVectorsReducer;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.VectorWritable;
-
-import java.util.List;
-import java.util.Map;
-
-public class PreparePreferenceMatrixJob extends AbstractJob {
-
- public static final String NUM_USERS = "numUsers.bin";
- public static final String ITEMID_INDEX = "itemIDIndex";
- public static final String USER_VECTORS = "userVectors";
- public static final String RATING_MATRIX = "ratingMatrix";
-
- private static final int DEFAULT_MIN_PREFS_PER_USER = 1;
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new PreparePreferenceMatrixJob(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOutputOption();
- addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, "
- + "users with more preferences will be sampled down");
- addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this "
- + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
- addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
- addOption("ratingShift", "rs", "shift ratings by this value", "0.0");
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
- boolean booleanData = Boolean.valueOf(getOption("booleanData"));
- float ratingShift = Float.parseFloat(getOption("ratingShift"));
- //convert items to an internal index
- Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class,
- ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class,
- VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class);
- itemIDIndex.setCombinerClass(ItemIDIndexReducer.class);
- boolean succeeded = itemIDIndex.waitForCompletion(true);
- if (!succeeded) {
- return -1;
- }
- //convert user preferences into a vector per user
- Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class,
- ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class,
- ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
- toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData);
- toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser);
- toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift));
- succeeded = toUserVectors.waitForCompletion(true);
- if (!succeeded) {
- return -1;
- }
- //we need the number of users later
- int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS).getValue();
- HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf());
- //build the rating matrix
- Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX),
- ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class,
- IntWritable.class, VectorWritable.class);
- toItemVectors.setCombinerClass(ToItemVectorsReducer.class);
-
- /* configure sampling regarding the uservectors */
- if (hasOption("maxPrefsPerUser")) {
- int samplingSize = Integer.parseInt(getOption("maxPrefsPerUser"));
- toItemVectors.getConfiguration().setInt(ToItemVectorsMapper.SAMPLE_SIZE, samplingSize);
- }
-
- succeeded = toItemVectors.waitForCompletion(true);
- if (!succeeded) {
- return -1;
- }
-
- return 0;
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
deleted file mode 100644
index ba85b2544..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.preparation;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.hadoop.similarity.cooccurrence.Vectors;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-public class ToItemVectorsMapper
- extends Mapper {
-
- public static final String SAMPLE_SIZE = ToItemVectorsMapper.class + ".sampleSize";
-
- enum Elements {
- USER_RATINGS_USED, USER_RATINGS_NEGLECTED
- }
-
- private int sampleSize;
-
- @Override
- protected void setup(Context ctx) throws IOException, InterruptedException {
- sampleSize = ctx.getConfiguration().getInt(SAMPLE_SIZE, Integer.MAX_VALUE);
- }
-
- @Override
- protected void map(VarLongWritable rowIndex, VectorWritable vectorWritable, Context ctx)
- throws IOException, InterruptedException {
- Vector userRatings = vectorWritable.get();
-
- int numElementsBeforeSampling = userRatings.getNumNondefaultElements();
- userRatings = Vectors.maybeSample(userRatings, sampleSize);
- int numElementsAfterSampling = userRatings.getNumNondefaultElements();
-
- int column = TasteHadoopUtils.idToIndex(rowIndex.get());
- VectorWritable itemVector = new VectorWritable(new RandomAccessSparseVector(Integer.MAX_VALUE, 1));
- itemVector.setWritesLaxPrecision(true);
-
- Iterator iterator = userRatings.iterateNonZero();
- while (iterator.hasNext()) {
- Vector.Element elem = iterator.next();
- itemVector.get().setQuick(column, elem.get());
- ctx.write(new IntWritable(elem.index()), itemVector);
- }
-
- ctx.getCounter(Elements.USER_RATINGS_USED).increment(numElementsAfterSampling);
- ctx.getCounter(Elements.USER_RATINGS_NEGLECTED).increment(numElementsBeforeSampling - numElementsAfterSampling);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
deleted file mode 100644
index 207a799f4..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.preparation;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VectorWritable;
-
-import java.io.IOException;
-
-public class ToItemVectorsReducer extends Reducer {
-
- @Override
- protected void reduce(IntWritable row, Iterable vectors, Context ctx)
- throws IOException, InterruptedException {
- VectorWritable vectorWritable = VectorWritable.merge(vectors.iterator());
- vectorWritable.setWritesLaxPrecision(true);
- ctx.write(row, vectorWritable);
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
deleted file mode 100644
index 02d1ba671..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.pseudo;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.math.VarLongWritable;
-
-/**
- *
- * This job runs a "pseudo-distributed" recommendation process on Hadoop. It merely runs many
- * {@link org.apache.mahout.cf.taste.recommender.Recommender} instances on Hadoop,
- * where each instance is a normal non-distributed implementation.
- *
- *
- *
This class configures and runs a {@link RecommenderReducer} using Hadoop.
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
-Dmapred.input.dir=(path): Location of a data model file containing preference data, suitable for use with
- * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}
- *
-Dmapred.output.dir=(path): output path where recommender output should go
- *
--recommenderClassName (string): Fully-qualified class name of
- * {@link org.apache.mahout.cf.taste.recommender.Recommender} to use to make recommendations.
- * Note that it must have a constructor which takes a {@link org.apache.mahout.cf.taste.model.DataModel}
- * argument.
- *
--numRecommendations (integer): Number of recommendations to compute per user
- *
--usersFile (path): file containing user IDs to recommend for (optional)
- *
- *
- *
General command line options are documented in {@link AbstractJob}.
- *
- *
Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
- * arguments.
- *
- *
- * For example, to get started trying this out, set up Hadoop in a pseudo-distributed manner:
- * http://hadoop.apache.org/common/docs/current/quickstart.html You can stop at the point where it instructs
- * you to copy files into HDFS.
- *
- *
- *
- * Assume your preference data file is {@code input.csv}. You will also need to create a file containing
- * all user IDs to write recommendations for, as something like {@code users.txt}. Place this input on
- * HDFS like so:
- *
- * Build Mahout code with {@code mvn package} in the core/ directory. Locate
- * {@code target/mahout-core-X.Y-SNAPSHOT.job}. This is a JAR file; copy it out to a convenient location
- * and name it {@code recommender.jar}.
- *
- *
- *
- * Now add your own custom recommender code and dependencies. Your IDE produced compiled .class files
- * somewhere and they need to be packaged up as well:
- *
- *
- * {@code hadoop jar recommender.jar \
- * org.apache.mahout.cf.taste.hadoop.pseudo.RecommenderJob \
- * -Dmapred.input.dir=input/users.csv \
- * -Dmapred.output.dir=output \
- * --recommenderClassName your.project.Recommender \
- * --numRecommendations 10 *
- * }
- */
-public final class RecommenderJob extends AbstractJob {
-
- @Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
-
- addInputOption();
- addOutputOption();
- addOption("recommenderClassName", "r", "Name of recommender class to instantiate");
- addOption("numRecommendations", "n", "Number of recommendations per user", "10");
- addOption("usersFile", "u", "File of users to recommend for", null);
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- Path inputFile = getInputPath();
- Path outputPath = getOutputPath();
- Path usersFile = hasOption("usersFile") ? inputFile : new Path(getOption("usersFile"));
-
- String recommendClassName = getOption("recommenderClassName");
- int recommendationsPerUser = Integer.parseInt(getOption("numRecommendations"));
-
- Job job = prepareJob(usersFile,
- outputPath,
- TextInputFormat.class,
- UserIDsMapper.class,
- VarLongWritable.class,
- NullWritable.class,
- RecommenderReducer.class,
- VarLongWritable.class,
- RecommendedItemsWritable.class,
- TextOutputFormat.class);
- FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
- Configuration jobConf = job.getConfiguration();
- jobConf.set(RecommenderReducer.RECOMMENDER_CLASS_NAME, recommendClassName);
- jobConf.setInt(RecommenderReducer.RECOMMENDATIONS_PER_USER, recommendationsPerUser);
- jobConf.set(RecommenderReducer.DATA_MODEL_FILE, inputFile.toString());
-
- boolean succeeded = job.waitForCompletion(true);
- return succeeded ? 0 : -1;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new RecommenderJob(), args);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java
deleted file mode 100644
index d2b3ac5f9..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.pseudo;
-
-import java.io.File;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.apache.mahout.math.VarLongWritable;
-
-/**
- *
- * The {@link Reducer} which takes as input the user IDs parsed out by the map phase, and for each unique user
- * ID, computes recommendations with the configured {@link Recommender}. The results are output as
- * {@link RecommendedItemsWritable}.
- *
- *
- * @see RecommenderJob
- */
-public final class RecommenderReducer extends
- Reducer {
-
- static final String RECOMMENDER_CLASS_NAME = "recommenderClassName";
- static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
- static final String DATA_MODEL_FILE = "dataModelFile";
-
- private Recommender recommender;
- private int recommendationsPerUser;
-
- @Override
- protected void setup(Context context) throws IOException {
- Configuration jobConf = context.getConfiguration();
- String dataModelFile = jobConf.get(DATA_MODEL_FILE);
- String recommenderClassName = jobConf.get(RECOMMENDER_CLASS_NAME);
- Path dataModelPath = new Path(dataModelFile);
- FileSystem fs = FileSystem.get(dataModelPath.toUri(), jobConf);
- File tempDataFile = File.createTempFile("mahout-taste-hadoop", "txt");
- tempDataFile.deleteOnExit();
- fs.copyToLocalFile(dataModelPath, new Path(tempDataFile.getAbsolutePath()));
- FileDataModel fileDataModel = new FileDataModel(tempDataFile);
-
- try {
- Class extends Recommender> recommenderClass = Class.forName(recommenderClassName).asSubclass(
- Recommender.class);
- Constructor extends Recommender> constructor = recommenderClass.getConstructor(DataModel.class);
- recommender = constructor.newInstance(fileDataModel);
- } catch (NoSuchMethodException nsme) {
- throw new IllegalStateException(nsme);
- } catch (ClassNotFoundException cnfe) {
- throw new IllegalStateException(cnfe);
- } catch (InstantiationException ie) {
- throw new IllegalStateException(ie);
- } catch (IllegalAccessException iae) {
- throw new IllegalStateException(iae);
- } catch (InvocationTargetException ite) {
- throw new IllegalStateException(ite.getCause());
- }
- recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
- }
-
- @Override
- protected void reduce(VarLongWritable key,
- Iterable values,
- Context context) throws IOException, InterruptedException {
- long userID = key.get();
- List recommendedItems;
- try {
- recommendedItems = recommender.recommend(userID, recommendationsPerUser);
- } catch (TasteException te) {
- throw new IllegalStateException(te);
- }
- Iterator it = recommendedItems.iterator();
- while (it.hasNext()) {
- if (Float.isNaN(it.next().getValue())) {
- it.remove();
- }
- }
- RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems);
- context.write(key, writable);
- context.getCounter(ReducerMetrics.USERS_PROCESSED).increment(1L);
- context.getCounter(ReducerMetrics.RECOMMENDATIONS_MADE).increment(recommendedItems.size());
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/ReducerMetrics.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/ReducerMetrics.java
deleted file mode 100644
index 22d903e2c..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/ReducerMetrics.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.pseudo;
-
-/** Custom metrics collected by {@link RecommenderReducer}. */
-public enum ReducerMetrics {
-
- /** Number of unique users for which recommendations were produced */
- USERS_PROCESSED,
- /** Number of items recommended to those users */
- RECOMMENDATIONS_MADE
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java
deleted file mode 100644
index 41188f331..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.pseudo;
-
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarLongWritable;
-
-/**
- * Extracts and emits all user IDs from the users file, or input file.
- */
-public final class UserIDsMapper extends
- Mapper {
-
- private static final Pattern DELIMITER = Pattern.compile("[\t,]");
-
- @Override
- protected void map(LongWritable key,
- Text value,
- Context context) throws IOException, InterruptedException {
- String[] tokens = DELIMITER.split(value.toString());
- long userID = Long.parseLong(tokens[0]);
- context.write(new VarLongWritable(userID), NullWritable.get());
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
deleted file mode 100644
index 76fad5fcb..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import com.google.common.base.Preconditions;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.common.TopK;
-import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob;
-import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;
-import org.apache.mahout.math.map.OpenIntLongHashMap;
-
-/**
- *
Distributed precomputation of the item-item-similarities for Itembased Collaborative Filtering
- *
- *
Preferences in the input file should look like {@code userID,itemID[,preferencevalue]}
- *
- *
- * Preference value is optional to accommodate applications that have no notion of a preference value (that is, the user
- * simply expresses a preference for an item, but no degree of preference).
- *
- *
- *
- * The preference value is assumed to be parseable as a {@code double}. The user IDs and item IDs are
- * parsed as {@code long}s.
- *
- *
- *
Command line arguments specific to this class are:
- *
- *
- *
-Dmapred.input.dir=(path): Directory containing one or more text files with the preference data
- *
-Dmapred.output.dir=(path): output path where similarity data should be written
- *
--similarityClassname (classname): Name of distributed similarity measure class to instantiate or a predefined similarity
- * from {@link org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasure}
- *
--maxSimilaritiesPerItem (integer): Maximum number of similarities considered per item (100)
- *
--maxCooccurrencesPerItem (integer): Maximum number of cooccurrences considered per item (100)
- *
--booleanData (boolean): Treat input data as having no pref values (false)
- *
- *
- *
General command line options are documented in {@link AbstractJob}.
- *
- *
Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other arguments.
- */
-public final class ItemSimilarityJob extends AbstractJob {
-
- static final String ITEM_ID_INDEX_PATH_STR = ItemSimilarityJob.class.getName() + ".itemIDIndexPathStr";
- static final String MAX_SIMILARITIES_PER_ITEM = ItemSimilarityJob.class.getName() + ".maxSimilarItemsPerItem";
-
- private static final int DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM = 100;
- private static final int DEFAULT_MAX_PREFS_PER_USER = 1000;
- private static final int DEFAULT_MIN_PREFS_PER_USER = 1;
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new ItemSimilarityJob(), args);
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- addInputOption();
- addOutputOption();
- addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, "
- + "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
- addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number "
- + "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')',
- String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
- addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, "
- + "users with more preferences will be sampled down (default: " + DEFAULT_MAX_PREFS_PER_USER + ')',
- String.valueOf(DEFAULT_MAX_PREFS_PER_USER));
- addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this "
- + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
- addOption("booleanData", "b", "Treat input as without pref values", String.valueOf(Boolean.FALSE));
- addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- String similarityClassName = getOption("similarityClassname");
- int maxSimilarItemsPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
- int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
- int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
- boolean booleanData = Boolean.valueOf(getOption("booleanData"));
-
- double threshold = hasOption("threshold") ?
- Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD;
-
- Path similarityMatrixPath = getTempPath("similarityMatrix");
- Path prepPath = getTempPath("prepareRatingMatrix");
-
- AtomicInteger currentPhase = new AtomicInteger();
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[]{
- "--input", getInputPath().toString(),
- "--output", prepPath.toString(),
- "--maxPrefsPerUser", String.valueOf(maxPrefsPerUser),
- "--minPrefsPerUser", String.valueOf(minPrefsPerUser),
- "--booleanData", String.valueOf(booleanData),
- "--tempDir", getTempPath().toString() });
- }
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- int numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS),
- getConf());
-
- ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] {
- "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(),
- "--output", similarityMatrixPath.toString(),
- "--numberOfColumns", String.valueOf(numberOfUsers),
- "--similarityClassname", similarityClassName,
- "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem),
- "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
- "--threshold", String.valueOf(threshold),
- "--tempDir", getTempPath().toString() });
- }
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job mostSimilarItems = prepareJob(similarityMatrixPath, getOutputPath(), SequenceFileInputFormat.class,
- MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class,
- MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class);
- Configuration mostSimilarItemsConf = mostSimilarItems.getConfiguration();
- mostSimilarItemsConf.set(ITEM_ID_INDEX_PATH_STR,
- new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
- mostSimilarItemsConf.setInt(MAX_SIMILARITIES_PER_ITEM, maxSimilarItemsPerItem);
- boolean succeeded = mostSimilarItems.waitForCompletion(true);
- if (!succeeded) {
- return -1;
- }
- }
-
- return 0;
- }
-
- public static class MostSimilarItemPairsMapper
- extends Mapper {
-
- private OpenIntLongHashMap indexItemIDMap;
- private int maxSimilarItemsPerItem;
-
- @Override
- protected void setup(Context ctx) {
- Configuration conf = ctx.getConfiguration();
- maxSimilarItemsPerItem = conf.getInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, -1);
- indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(conf.get(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR), conf);
-
- Preconditions.checkArgument(maxSimilarItemsPerItem > 0, "maxSimilarItemsPerItem was not correctly set!");
- }
-
- @Override
- protected void map(IntWritable itemIDIndexWritable, VectorWritable similarityVector, Context ctx)
- throws IOException, InterruptedException {
-
- int itemIDIndex = itemIDIndexWritable.get();
-
- TopK topKMostSimilarItems =
- new TopK(maxSimilarItemsPerItem, SimilarItem.COMPARE_BY_SIMILARITY);
-
- Iterator similarityVectorIterator = similarityVector.get().iterateNonZero();
-
- while (similarityVectorIterator.hasNext()) {
- Vector.Element element = similarityVectorIterator.next();
- topKMostSimilarItems.offer(new SimilarItem(indexItemIDMap.get(element.index()), element.get()));
- }
-
- long itemID = indexItemIDMap.get(itemIDIndex);
- for (SimilarItem similarItem : topKMostSimilarItems.retrieve()) {
- long otherItemID = similarItem.getItemID();
- if (itemID < otherItemID) {
- ctx.write(new EntityEntityWritable(itemID, otherItemID), new DoubleWritable(similarItem.getSimilarity()));
- } else {
- ctx.write(new EntityEntityWritable(otherItemID, itemID), new DoubleWritable(similarItem.getSimilarity()));
- }
- }
- }
- }
-
- static class MostSimilarItemPairsReducer
- extends Reducer {
- @Override
- protected void reduce(EntityEntityWritable pair, Iterable values, Context ctx)
- throws IOException, InterruptedException {
- ctx.write(pair, values.iterator().next());
- }
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java
deleted file mode 100644
index 859b3fb2b..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-class SimilarItem {
-
- static final Comparator COMPARE_BY_SIMILARITY = new BySimilaritySimilarItemComparator();
-
- private final long itemID;
- private final double similarity;
-
- SimilarItem(long itemID, double similarity) {
- this.itemID = itemID;
- this.similarity = similarity;
- }
-
- public long getItemID() {
- return itemID;
- }
-
- public double getSimilarity() {
- return similarity;
- }
-
- static class BySimilaritySimilarItemComparator implements Comparator, Serializable {
- @Override
- public int compare(SimilarItem s1, SimilarItem s2) {
- return s1.similarity == s2.similarity ? 0 : s1.similarity < s2.similarity ? -1 : 1;
- }
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/ByItemIDComparator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/ByItemIDComparator.java
deleted file mode 100644
index 9b116318d..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/ByItemIDComparator.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.slopeone;
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-
-final class ByItemIDComparator implements Comparator, Serializable {
-
- private static final Comparator INSTANCE = new ByItemIDComparator();
-
- public static Comparator getInstance() {
- return INSTANCE;
- }
-
- @Override
- public int compare(EntityPrefWritable a, EntityPrefWritable b) {
- long idA = a.getID();
- long idB = b.getID();
- return idA < idB ? -1 : idA > idB ? 1 : 0;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
deleted file mode 100644
index d71e507e3..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.slopeone;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
-import org.apache.mahout.math.Varint;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-public final class FullRunningAverageAndStdDevWritable implements Writable {
-
- private FullRunningAverageAndStdDev average;
-
- public FullRunningAverageAndStdDevWritable(FullRunningAverageAndStdDev average) {
- this.average = average;
- }
-
- public FullRunningAverageAndStdDev getAverage() {
- return average;
- }
-
- @Override
- public String toString() {
- return new StringBuilder()
- .append(average.getAverage()).append('\t')
- .append(average.getCount()).append('\t')
- .append(average.getMk()).append('\t')
- .append(average.getSk()).toString();
- }
-
- @Override
- public void write(DataOutput dataOutput) throws IOException {
- Varint.writeUnsignedVarInt(average.getCount(), dataOutput);
- dataOutput.writeDouble(average.getAverage());
- dataOutput.writeDouble(average.getMk());
- dataOutput.writeDouble(average.getSk());
- }
-
- @Override
- public void readFields(DataInput dataInput) throws IOException {
- int count = Varint.readUnsignedVarInt(dataInput);
- double diff = dataInput.readDouble();
- double mk = dataInput.readDouble();
- double sk = dataInput.readDouble();
- average = new FullRunningAverageAndStdDev(count, diff, mk, sk);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
deleted file mode 100644
index 57fa036f8..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.slopeone;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.math.VarLongWritable;
-
-public final class SlopeOneAverageDiffsJob extends AbstractJob {
-
- @Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
-
- addInputOption();
- addOutputOption();
-
- Map> parsedArgs = parseArguments(args);
- if (parsedArgs == null) {
- return -1;
- }
-
- Path prefsFile = getInputPath();
- Path outputPath = getOutputPath();
- Path averagesOutputPath = new Path(getOption("--tempDir"));
-
- AtomicInteger currentPhase = new AtomicInteger();
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job prefsToDiffsJob = prepareJob(prefsFile,
- averagesOutputPath,
- TextInputFormat.class,
- ToItemPrefsMapper.class,
- VarLongWritable.class,
- EntityPrefWritable.class,
- SlopeOnePrefsToDiffsReducer.class,
- EntityEntityWritable.class,
- FloatWritable.class,
- SequenceFileOutputFormat.class);
- boolean succeeded = prefsToDiffsJob.waitForCompletion(true);
- if (!succeeded)
- return -1;
- }
-
-
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job diffsToAveragesJob = prepareJob(averagesOutputPath,
- outputPath,
- SequenceFileInputFormat.class,
- Mapper.class,
- EntityEntityWritable.class,
- FloatWritable.class,
- SlopeOneDiffsToAveragesReducer.class,
- EntityEntityWritable.class,
- FullRunningAverageAndStdDevWritable.class,
- TextOutputFormat.class);
- FileOutputFormat.setOutputCompressorClass(diffsToAveragesJob, GzipCodec.class);
- boolean succeeded = diffsToAveragesJob.waitForCompletion(true);
- if (!succeeded)
- return -1;
- }
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new SlopeOneAverageDiffsJob(), args);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
deleted file mode 100644
index 7cc8594d8..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.slopeone;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
-
-public final class SlopeOneDiffsToAveragesReducer extends
- Reducer {
-
- @Override
- protected void reduce(EntityEntityWritable key,
- Iterable values,
- Context context) throws IOException, InterruptedException {
- FullRunningAverageAndStdDev average = new FullRunningAverageAndStdDev();
- for (FloatWritable value : values) {
- average.addDatum(value.get());
- }
- context.write(key, new FullRunningAverageAndStdDevWritable(average));
- }
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
deleted file mode 100644
index aed2c1594..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.slopeone;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-
-import com.google.common.collect.Lists;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.math.VarLongWritable;
-
-public final class SlopeOnePrefsToDiffsReducer extends
- Reducer {
-
- @Override
- protected void reduce(VarLongWritable key,
- Iterable values,
- Context context) throws IOException, InterruptedException {
- List prefs = Lists.newArrayList();
- for (EntityPrefWritable writable : values) {
- prefs.add(new EntityPrefWritable(writable));
- }
- Collections.sort(prefs, ByItemIDComparator.getInstance());
- int size = prefs.size();
- for (int i = 0; i < size; i++) {
- EntityPrefWritable first = prefs.get(i);
- long itemAID = first.getID();
- float itemAValue = first.getPrefValue();
- for (int j = i + 1; j < size; j++) {
- EntityPrefWritable second = prefs.get(j);
- long itemBID = second.getID();
- float itemBValue = second.getPrefValue();
- context.write(new EntityEntityWritable(itemAID, itemBID), new FloatWritable(itemBValue - itemAValue));
- }
- }
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/AbstractLongPrimitiveIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/AbstractLongPrimitiveIterator.java
deleted file mode 100644
index f46785c88..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/AbstractLongPrimitiveIterator.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-public abstract class AbstractLongPrimitiveIterator implements LongPrimitiveIterator {
-
- @Override
- public Long next() {
- return nextLong();
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/BitSet.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/BitSet.java
deleted file mode 100644
index 6f6464879..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/BitSet.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-
-/** A simplified and streamlined version of {@link java.util.BitSet}. */
-final class BitSet implements Serializable, Cloneable {
-
- private final long[] bits;
-
- BitSet(int numBits) {
- int numLongs = numBits >>> 6;
- if ((numBits & 0x3F) != 0) {
- numLongs++;
- }
- bits = new long[numLongs];
- }
-
- private BitSet(long[] bits) {
- this.bits = bits;
- }
-
- boolean get(int index) {
- // skipping range check for speed
- return (bits[index >>> 6] & 1L << (index & 0x3F)) != 0L;
- }
-
- void set(int index) {
- // skipping range check for speed
- bits[index >>> 6] |= 1L << (index & 0x3F);
- }
-
- void clear(int index) {
- // skipping range check for speed
- bits[index >>> 6] &= ~(1L << (index & 0x3F));
- }
-
- void clear() {
- int length = bits.length;
- for (int i = 0; i < length; i++) {
- bits[i] = 0L;
- }
- }
-
- @Override
- public BitSet clone() {
- return new BitSet(bits);
- }
-
- @Override
- public String toString() {
- StringBuilder result = new StringBuilder(64 * bits.length);
- for (long l : bits) {
- for (int j = 0; j < 64; j++) {
- result.append((l & 1L << j) == 0 ? '0' : '1');
- }
- result.append(' ');
- }
- return result.toString();
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java
deleted file mode 100755
index 2dfd9879f..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Cache.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import com.google.common.base.Preconditions;
-import org.apache.mahout.cf.taste.common.TasteException;
-
-import java.util.Iterator;
-
-/**
- *
- * An efficient Map-like class which caches values for keys. Values are not "put" into a ;
- * instead the caller supplies the instance with an implementation of {@link Retriever} which can load the
- * value for a given key.
- *
- *
- *
- * The cache does not support {@code null} keys.
- *
- *
- *
- * Thanks to Amila Jayasooriya for helping evaluate performance of the rewrite of this class, as part of a
- * Google Summer of Code 2007 project.
- *
- */
-public final class Cache implements Retriever {
-
- private static final Object NULL = new Object();
-
- private final FastMap cache;
- private final Retriever super K,? extends V> retriever;
-
- /**
- *
- * Creates a new cache based on the given {@link Retriever}.
- *
- *
- * @param retriever
- * object which can retrieve values for keys
- */
- public Cache(Retriever super K,? extends V> retriever) {
- this(retriever, FastMap.NO_MAX_SIZE);
- }
-
- /**
- *
- * Creates a new cache based on the given {@link Retriever} and with given maximum size.
- *
- *
- * @param retriever
- * object which can retrieve values for keys
- * @param maxEntries
- * maximum number of entries the cache will store before evicting some
- */
- public Cache(Retriever super K,? extends V> retriever, int maxEntries) {
- Preconditions.checkArgument(retriever != null, "retriever is null");
- Preconditions.checkArgument(maxEntries >= 1, "maxEntries must be at least 1");
- cache = new FastMap(11, maxEntries);
- this.retriever = retriever;
- }
-
- /**
- *
- * Returns cached value for a key. If it does not exist, it is loaded using a {@link Retriever}.
- *
- *
- * @param key
- * cache key
- * @return value for that key
- * @throws TasteException
- * if an exception occurs while retrieving a new cached value
- */
- @Override
- public V get(K key) throws TasteException {
- V value;
- synchronized (cache) {
- value = cache.get(key);
- }
- if (value == null) {
- return getAndCacheValue(key);
- }
- return value == NULL ? null : value;
- }
-
- /**
- *
- * Uncaches any existing value for a given key.
- *
- *
- * @param key
- * cache key
- */
- public void remove(K key) {
- synchronized (cache) {
- cache.remove(key);
- }
- }
-
- /**
- * Clears all cache entries whose key matches the given predicate.
- */
- public void removeKeysMatching(MatchPredicate predicate) {
- synchronized (cache) {
- Iterator it = cache.keySet().iterator();
- while (it.hasNext()) {
- K key = it.next();
- if (predicate.matches(key)) {
- it.remove();
- }
- }
- }
- }
-
- /**
- * Clears all cache entries whose value matches the given predicate.
- */
- public void removeValueMatching(MatchPredicate predicate) {
- synchronized (cache) {
- Iterator it = cache.values().iterator();
- while (it.hasNext()) {
- V value = it.next();
- if (predicate.matches(value)) {
- it.remove();
- }
- }
- }
- }
-
- /**
- *
- * Clears the cache.
- *
- */
- public void clear() {
- synchronized (cache) {
- cache.clear();
- }
- }
-
- private V getAndCacheValue(K key) throws TasteException {
- V value = retriever.get(key);
- if (value == null) {
- value = (V) NULL;
- }
- synchronized (cache) {
- cache.put(key, value);
- }
- return value;
- }
-
- @Override
- public String toString() {
- return "Cache[retriever:" + retriever + ']';
- }
-
- /**
- * Used by {#link #removeKeysMatching(Object)} to decide things that are matching.
- */
- public interface MatchPredicate {
- boolean matches(T thing);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
deleted file mode 100644
index 19452ba43..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
+++ /dev/null
@@ -1,571 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-import java.util.AbstractSet;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Set;
-
-import org.apache.mahout.common.RandomUtils;
-
-import com.google.common.base.Preconditions;
-
-/**
- * @see FastMap
- * @see FastIDSet
- */
-public final class FastByIDMap implements Serializable, Cloneable {
-
- public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
- private static final float DEFAULT_LOAD_FACTOR = 1.5f;
-
- /** Dummy object used to represent a key that has been removed. */
- private static final long REMOVED = Long.MAX_VALUE;
- private static final long NULL = Long.MIN_VALUE;
-
- private long[] keys;
- private V[] values;
- private float loadFactor;
- private int numEntries;
- private int numSlotsUsed;
- private final int maxSize;
- private BitSet recentlyAccessed;
- private final boolean countingAccesses;
-
- /** Creates a new with default capacity. */
- public FastByIDMap() {
- this(2, NO_MAX_SIZE);
- }
-
- public FastByIDMap(int size) {
- this(size, NO_MAX_SIZE);
- }
-
- public FastByIDMap(int size, float loadFactor) {
- this(size, NO_MAX_SIZE, loadFactor);
- }
-
- public FastByIDMap(int size, int maxSize) {
- this(size, maxSize, DEFAULT_LOAD_FACTOR);
- }
-
- /**
- * Creates a new whose capacity can accommodate the given number of entries without rehash.
- *
- * @param size desired capacity
- * @param maxSize max capacity
- * @param loadFactor ratio of internal hash table size to current size
- * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
- * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
- * loadFactor is less than 1
- */
- public FastByIDMap(int size, int maxSize, float loadFactor) {
- Preconditions.checkArgument(size >= 0, "size must be at least 0");
- Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
- this.loadFactor = loadFactor;
- int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
- Preconditions.checkArgument(size < max, "size must be less than " + max);
- Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
- int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
- keys = new long[hashSize];
- Arrays.fill(keys, NULL);
- values = (V[]) new Object[hashSize];
- this.maxSize = maxSize;
- this.countingAccesses = maxSize != Integer.MAX_VALUE;
- this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
- }
-
- /**
- * @see #findForAdd(long)
- */
- private int find(long key) {
- int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
- long[] keys = this.keys;
- int hashSize = keys.length;
- int jump = 1 + theHashCode % (hashSize - 2);
- int index = theHashCode % hashSize;
- long currentKey = keys[index];
- while (currentKey != NULL && key != currentKey) {
- if (index < jump) {
- index += hashSize - jump;
- } else {
- index -= jump;
- }
- currentKey = keys[index];
- }
- return index;
- }
-
- /**
- * @see #find(long)
- */
- private int findForAdd(long key) {
- int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
- long[] keys = this.keys;
- int hashSize = keys.length;
- int jump = 1 + theHashCode % (hashSize - 2);
- int index = theHashCode % hashSize;
- long currentKey = keys[index];
- while (currentKey != NULL && currentKey != REMOVED && key != currentKey) { // Different
- // here
- if (index < jump) {
- index += hashSize - jump;
- } else {
- index -= jump;
- }
- currentKey = keys[index];
- }
- return index;
- }
-
- public V get(long key) {
- if (key == NULL) {
- return null;
- }
- int index = find(key);
- if (countingAccesses) {
- recentlyAccessed.set(index);
- }
- return values[index];
- }
-
- public int size() {
- return numEntries;
- }
-
- public boolean isEmpty() {
- return numEntries == 0;
- }
-
- public boolean containsKey(long key) {
- return key != NULL && key != REMOVED && keys[find(key)] != NULL;
- }
-
- public boolean containsValue(Object value) {
- if (value == null) {
- return false;
- }
- for (V theValue : values) {
- if (theValue != null && value.equals(theValue)) {
- return true;
- }
- }
- return false;
- }
-
- public V put(long key, V value) {
- Preconditions.checkArgument(key != NULL && key != REMOVED);
- if (value == null) {
- throw new NullPointerException();
- }
- // If less than half the slots are open, let's clear it up
- if (numSlotsUsed * loadFactor >= keys.length) {
- // If over half the slots used are actual entries, let's grow
- if (numEntries * loadFactor >= numSlotsUsed) {
- growAndRehash();
- } else {
- // Otherwise just rehash to clear REMOVED entries and don't grow
- rehash();
- }
- }
- // Here we may later consider implementing Brent's variation described on page 532
- int index = findForAdd(key);
- long keyIndex = keys[index];
- if (keyIndex == key) {
- V oldValue = values[index];
- values[index] = value;
- return oldValue;
- } else {
- // If size is limited,
- if (countingAccesses && numEntries >= maxSize) {
- // and we're too large, clear some old-ish entry
- clearStaleEntry(index);
- }
- keys[index] = key;
- values[index] = value;
- numEntries++;
- if (keyIndex == NULL) {
- numSlotsUsed++;
- }
- return null;
- }
- }
-
- private void clearStaleEntry(int index) {
- while (true) {
- long currentKey;
- do {
- if (index == 0) {
- index = keys.length - 1;
- } else {
- index--;
- }
- currentKey = keys[index];
- } while (currentKey == NULL || currentKey == REMOVED);
- if (recentlyAccessed.get(index)) {
- recentlyAccessed.clear(index);
- } else {
- break;
- }
- }
- // Delete the entry
- keys[index] = REMOVED;
- numEntries--;
- values[index] = null;
- }
-
- public V remove(long key) {
- if (key == NULL || key == REMOVED) {
- return null;
- }
- int index = find(key);
- if (keys[index] == NULL) {
- return null;
- } else {
- keys[index] = REMOVED;
- numEntries--;
- V oldValue = values[index];
- values[index] = null;
- // don't decrement numSlotsUsed
- return oldValue;
- }
- // Could un-set recentlyAccessed's bit but doesn't matter
- }
-
- public void clear() {
- numEntries = 0;
- numSlotsUsed = 0;
- Arrays.fill(keys, NULL);
- Arrays.fill(values, null);
- if (countingAccesses) {
- recentlyAccessed.clear();
- }
- }
-
- public LongPrimitiveIterator keySetIterator() {
- return new KeyIterator();
- }
-
- public Set> entrySet() {
- return new EntrySet();
- }
-
- public void rehash() {
- rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
- }
-
- private void growAndRehash() {
- if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
- throw new IllegalStateException("Can't grow any more");
- }
- rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
- }
-
- private void rehash(int newHashSize) {
- long[] oldKeys = keys;
- V[] oldValues = values;
- numEntries = 0;
- numSlotsUsed = 0;
- if (countingAccesses) {
- recentlyAccessed = new BitSet(newHashSize);
- }
- keys = new long[newHashSize];
- Arrays.fill(keys, NULL);
- values = (V[]) new Object[newHashSize];
- int length = oldKeys.length;
- for (int i = 0; i < length; i++) {
- long key = oldKeys[i];
- if (key != NULL && key != REMOVED) {
- put(key, oldValues[i]);
- }
- }
- }
-
- void iteratorRemove(int lastNext) {
- if (lastNext >= values.length) {
- throw new NoSuchElementException();
- }
- if (lastNext < 0) {
- throw new IllegalStateException();
- }
- values[lastNext] = null;
- keys[lastNext] = REMOVED;
- numEntries--;
- }
-
- @Override
- public FastByIDMap clone() {
- FastByIDMap clone;
- try {
- clone = (FastByIDMap) super.clone();
- } catch (CloneNotSupportedException cnse) {
- throw new AssertionError();
- }
- clone.keys = keys.clone();
- clone.values = values.clone();
- clone.recentlyAccessed = countingAccesses ? new BitSet(keys.length) : null;
- return clone;
- }
-
- @Override
- public String toString() {
- if (isEmpty()) {
- return "{}";
- }
- StringBuilder result = new StringBuilder();
- result.append('{');
- for (int i = 0; i < keys.length; i++) {
- long key = keys[i];
- if (key != NULL && key != REMOVED) {
- result.append(key).append('=').append(values[i]).append(',');
- }
- }
- result.setCharAt(result.length() - 1, '}');
- return result.toString();
- }
-
- @Override
- public int hashCode() {
- int hash = 0;
- long[] keys = this.keys;
- int max = keys.length;
- for (int i = 0; i < max; i++) {
- long key = keys[i];
- if (key != NULL && key != REMOVED) {
- hash = 31 * hash + ((int) (key >> 32) ^ (int) key);
- hash = 31 * hash + values[i].hashCode();
- }
- }
- return hash;
- }
-
- @Override
- public boolean equals(Object other) {
- if (!(other instanceof FastByIDMap)) {
- return false;
- }
- FastByIDMap otherMap = (FastByIDMap) other;
- long[] otherKeys = otherMap.keys;
- V[] otherValues = otherMap.values;
- int length = keys.length;
- int otherLength = otherKeys.length;
- int max = Math.min(length, otherLength);
-
- int i = 0;
- while (i < max) {
- long key = keys[i];
- long otherKey = otherKeys[i];
- if (key == NULL || key == REMOVED) {
- if (otherKey != NULL && otherKey != REMOVED) {
- return false;
- }
- } else {
- if (key != otherKey || !values[i].equals(otherValues[i])) {
- return false;
- }
- }
- i++;
- }
- while (i < length) {
- long key = keys[i];
- if (key != NULL && key != REMOVED) {
- return false;
- }
- i++;
- }
- while (i < otherLength) {
- long key = otherKeys[i];
- if (key != NULL && key != REMOVED) {
- return false;
- }
- i++;
- }
- return true;
- }
-
- private final class KeyIterator extends AbstractLongPrimitiveIterator {
-
- private int position;
- private int lastNext = -1;
-
- @Override
- public boolean hasNext() {
- goToNext();
- return position < keys.length;
- }
-
- @Override
- public long nextLong() {
- goToNext();
- lastNext = position;
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return keys[position++];
- }
-
- @Override
- public long peek() {
- goToNext();
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return keys[position];
- }
-
- private void goToNext() {
- int length = values.length;
- while (position < length && values[position] == null) {
- position++;
- }
- }
-
- @Override
- public void remove() {
- iteratorRemove(lastNext);
- }
-
- @Override
- public void skip(int n) {
- position += n;
- }
-
- }
-
- private final class EntrySet extends AbstractSet> {
-
- @Override
- public int size() {
- return FastByIDMap.this.size();
- }
-
- @Override
- public boolean isEmpty() {
- return FastByIDMap.this.isEmpty();
- }
-
- @Override
- public boolean contains(Object o) {
- return containsKey((Long) o);
- }
-
- @Override
- public Iterator> iterator() {
- return new EntryIterator();
- }
-
- @Override
- public boolean add(Map.Entry t) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean remove(Object o) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean addAll(Collection extends Map.Entry> ts) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean retainAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean removeAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void clear() {
- FastByIDMap.this.clear();
- }
-
- private final class MapEntry implements Map.Entry {
-
- private final int index;
-
- private MapEntry(int index) {
- this.index = index;
- }
-
- @Override
- public Long getKey() {
- return keys[index];
- }
-
- @Override
- public V getValue() {
- return values[index];
- }
-
- @Override
- public V setValue(V value) {
- Preconditions.checkArgument(value != null);
-
- V oldValue = values[index];
- values[index] = value;
- return oldValue;
- }
- }
-
- private final class EntryIterator implements Iterator> {
-
- private int position;
- private int lastNext = -1;
-
- @Override
- public boolean hasNext() {
- goToNext();
- return position < keys.length;
- }
-
- @Override
- public Map.Entry next() {
- goToNext();
- lastNext = position;
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return new MapEntry(position++);
- }
-
- private void goToNext() {
- int length = values.length;
- while (position < length && values[position] == null) {
- position++;
- }
- }
-
- @Override
- public void remove() {
- iteratorRemove(lastNext);
- }
- }
-
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
deleted file mode 100644
index 2c0c86bd0..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
+++ /dev/null
@@ -1,428 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-import org.apache.mahout.common.RandomUtils;
-
-import com.google.common.base.Preconditions;
-
-/**
- * @see FastByIDMap
- */
-public final class FastIDSet implements Serializable, Cloneable, Iterable {
-
- private static final float DEFAULT_LOAD_FACTOR = 1.5f;
-
- /** Dummy object used to represent a key that has been removed. */
- private static final long REMOVED = Long.MAX_VALUE;
- private static final long NULL = Long.MIN_VALUE;
-
- private long[] keys;
- private float loadFactor;
- private int numEntries;
- private int numSlotsUsed;
-
- /** Creates a new with default capacity. */
- public FastIDSet() {
- this(2);
- }
-
- public FastIDSet(long[] initialKeys) {
- this(initialKeys.length);
- addAll(initialKeys);
- }
-
- public FastIDSet(int size) {
- this(size, DEFAULT_LOAD_FACTOR);
- }
-
- public FastIDSet(int size, float loadFactor) {
- Preconditions.checkArgument(size >= 0, "size must be at least 0");
- Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
- this.loadFactor = loadFactor;
- int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
- Preconditions.checkArgument(size < max, "size must be less than %d", max);
- int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
- keys = new long[hashSize];
- Arrays.fill(keys, NULL); }
-
- /**
- * @see #findForAdd(long)
- */
- private int find(long key) {
- int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
- long[] keys = this.keys;
- int hashSize = keys.length;
- int jump = 1 + theHashCode % (hashSize - 2);
- int index = theHashCode % hashSize;
- long currentKey = keys[index];
- while (currentKey != NULL && key != currentKey) { // note: true when currentKey == REMOVED
- if (index < jump) {
- index += hashSize - jump;
- } else {
- index -= jump;
- }
- currentKey = keys[index];
- }
- return index;
- }
-
- /**
- * @see #find(long)
- */
- private int findForAdd(long key) {
- int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
- long[] keys = this.keys;
- int hashSize = keys.length;
- int jump = 1 + theHashCode % (hashSize - 2);
- int index = theHashCode % hashSize;
- long currentKey = keys[index];
- while (currentKey != NULL && currentKey != REMOVED && key != currentKey) { // Different here
- if (index < jump) {
- index += hashSize - jump;
- } else {
- index -= jump;
- }
- currentKey = keys[index];
- }
- return index;
- }
-
- public int size() {
- return numEntries;
- }
-
- public boolean isEmpty() {
- return numEntries == 0;
- }
-
- public boolean contains(long key) {
- return key != NULL && key != REMOVED && keys[find(key)] != NULL;
- }
-
- public boolean add(long key) {
- Preconditions.checkArgument(key != NULL && key != REMOVED);
-
- // If less than half the slots are open, let's clear it up
- if (numSlotsUsed * loadFactor >= keys.length) {
- // If over half the slots used are actual entries, let's grow
- if (numEntries * loadFactor >= numSlotsUsed) {
- growAndRehash();
- } else {
- // Otherwise just rehash to clear REMOVED entries and don't grow
- rehash();
- }
- }
- // Here we may later consider implementing Brent's variation described on page 532
- int index = findForAdd(key);
- long keyIndex = keys[index];
- if (keyIndex != key) {
- keys[index] = key;
- numEntries++;
- if (keyIndex == NULL) {
- numSlotsUsed++;
- }
- return true;
- }
- return false;
- }
-
- @Override
- public LongPrimitiveIterator iterator() {
- return new KeyIterator();
- }
-
- public long[] toArray() {
- long[] result = new long[numEntries];
- for (int i = 0, position = 0; i < result.length; i++) {
- while (keys[position] == NULL || keys[position] == REMOVED) {
- position++;
- }
- result[i] = keys[position++];
- }
- return result;
- }
-
- public boolean remove(long key) {
- if (key == NULL || key == REMOVED) {
- return false;
- }
- int index = find(key);
- if (keys[index] == NULL) {
- return false;
- } else {
- keys[index] = REMOVED;
- numEntries--;
- return true;
- }
- }
-
- public boolean addAll(long[] c) {
- boolean changed = false;
- for (long k : c) {
- if (add(k)) {
- changed = true;
- }
- }
- return changed;
- }
-
- public boolean addAll(FastIDSet c) {
- boolean changed = false;
- for (long k : c.keys) {
- if (k != NULL && k != REMOVED && add(k)) {
- changed = true;
- }
- }
- return changed;
- }
-
- public boolean removeAll(long[] c) {
- boolean changed = false;
- for (long o : c) {
- if (remove(o)) {
- changed = true;
- }
- }
- return changed;
- }
-
- public boolean removeAll(FastIDSet c) {
- boolean changed = false;
- for (long k : c.keys) {
- if (k != NULL && k != REMOVED && remove(k)) {
- changed = true;
- }
- }
- return changed;
- }
-
- public boolean retainAll(FastIDSet c) {
- boolean changed = false;
- for (int i = 0; i < keys.length; i++) {
- long k = keys[i];
- if (k != NULL && k != REMOVED && !c.contains(k)) {
- keys[i] = REMOVED;
- numEntries--;
- changed = true;
- }
- }
- return changed;
- }
-
- public void clear() {
- numEntries = 0;
- numSlotsUsed = 0;
- Arrays.fill(keys, NULL);
- }
-
- private void growAndRehash() {
- if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
- throw new IllegalStateException("Can't grow any more");
- }
- rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
- }
-
- public void rehash() {
- rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
- }
-
- private void rehash(int newHashSize) {
- long[] oldKeys = keys;
- numEntries = 0;
- numSlotsUsed = 0;
- keys = new long[newHashSize];
- Arrays.fill(keys, NULL);
- int length = oldKeys.length;
- for (int i = 0; i < length; i++) {
- long key = oldKeys[i];
- if (key != NULL && key != REMOVED) {
- add(key);
- }
- }
- }
-
- /**
- * Convenience method to quickly compute just the size of the intersection with another .
- *
- * @param other
- * to intersect with
- * @return number of elements in intersection
- */
- public int intersectionSize(FastIDSet other) {
- int count = 0;
- for (long key : other.keys) {
- if (key != NULL && key != REMOVED && keys[find(key)] != NULL) {
- count++;
- }
- }
- return count;
- }
-
- @Override
- public FastIDSet clone() {
- FastIDSet clone;
- try {
- clone = (FastIDSet) super.clone();
- } catch (CloneNotSupportedException cnse) {
- throw new AssertionError();
- }
- clone.keys = keys.clone();
- return clone;
- }
-
- @Override
- public int hashCode() {
- int hash = 0;
- long[] keys = this.keys;
- int max = keys.length;
- for (int i = 0; i < max; i++) {
- long key = keys[i];
- if (key != NULL && key != REMOVED) {
- hash = 31 * hash + ((int) (key >> 32) ^ (int) key);
- }
- }
- return hash;
- }
-
- @Override
- public boolean equals(Object other) {
- if (!(other instanceof FastIDSet)) {
- return false;
- }
- FastIDSet otherMap = (FastIDSet) other;
- long[] otherKeys = otherMap.keys;
- int length = keys.length;
- int otherLength = otherKeys.length;
- int max = Math.min(length, otherLength);
-
- int i = 0;
- while (i < max) {
- long key = keys[i];
- long otherKey = otherKeys[i];
- if (key == NULL || key == REMOVED) {
- if (otherKey != NULL && otherKey != REMOVED) {
- return false;
- }
- } else {
- if (key != otherKey) {
- return false;
- }
- }
- i++;
- }
- while (i < length) {
- long key = keys[i];
- if (key != NULL && key != REMOVED) {
- return false;
- }
- i++;
- }
- while (i < otherLength) {
- long key = otherKeys[i];
- if (key != NULL && key != REMOVED) {
- return false;
- }
- i++;
- }
- return true;
- }
-
- @Override
- public String toString() {
- if (isEmpty()) {
- return "[]";
- }
- StringBuilder result = new StringBuilder();
- result.append('[');
- for (long key : keys) {
- if (key != NULL && key != REMOVED) {
- result.append(key).append(',');
- }
- }
- result.setCharAt(result.length() - 1, ']');
- return result.toString();
- }
-
- private final class KeyIterator extends AbstractLongPrimitiveIterator {
-
- private int position;
- private int lastNext = -1;
-
- @Override
- public boolean hasNext() {
- goToNext();
- return position < keys.length;
- }
-
- @Override
- public long nextLong() {
- goToNext();
- lastNext = position;
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return keys[position++];
- }
-
- @Override
- public long peek() {
- goToNext();
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return keys[position];
- }
-
- private void goToNext() {
- int length = keys.length;
- while (position < length
- && (keys[position] == NULL || keys[position] == REMOVED)) {
- position++;
- }
- }
-
- @Override
- public void remove() {
- if (lastNext >= keys.length) {
- throw new NoSuchElementException();
- }
- if (lastNext < 0) {
- throw new IllegalStateException();
- }
- keys[lastNext] = REMOVED;
- numEntries--;
- }
-
- public Iterator iterator() {
- return new KeyIterator();
- }
-
- @Override
- public void skip(int n) {
- position += n;
- }
-
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
deleted file mode 100644
index df2a47023..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
+++ /dev/null
@@ -1,712 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-import java.util.AbstractCollection;
-import java.util.AbstractSet;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Set;
-
-import org.apache.mahout.common.RandomUtils;
-
-import com.google.common.base.Preconditions;
-
-/**
- *
- * This is an optimized {@link Map} implementation, based on algorithms described in Knuth's "Art of Computer
- * Programming", Vol. 3, p. 529.
- *
- *
- *
- * It should be faster than {@link java.util.HashMap} in some cases, but not all. Its main feature is a
- * "max size" and the ability to transparently, efficiently and semi-intelligently evict old entries when max
- * size is exceeded.
- *
- *
- *
- * This class is not a bit thread-safe.
- *
- *
- *
- * This implementation does not allow {@code null} as a key or value.
- *
- */
-public final class FastMap implements Map, Serializable, Cloneable {
-
- public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
- private static final float DEFAULT_LOAD_FACTOR = 1.5f;
-
- /** Dummy object used to represent a key that has been removed. */
- private static final Object REMOVED = new Object();
-
- private K[] keys;
- private V[] values;
- private float loadFactor;
- private int numEntries;
- private int numSlotsUsed;
- private final int maxSize;
- private BitSet recentlyAccessed;
- private final boolean countingAccesses;
-
- /** Creates a new with default capacity. */
- public FastMap() {
- this(2, NO_MAX_SIZE);
- }
-
- public FastMap(int size) {
- this(size, NO_MAX_SIZE);
- }
-
- public FastMap(Map other) {
- this(other.size());
- putAll(other);
- }
-
- public FastMap(int size, float loadFactor) {
- this(size, NO_MAX_SIZE, loadFactor);
- }
-
- public FastMap(int size, int maxSize) {
- this(size, maxSize, DEFAULT_LOAD_FACTOR);
- }
-
- /**
- * Creates a new whose capacity can accommodate the given number of entries without rehash.
- *
- * @param size desired capacity
- * @param maxSize max capacity
- * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
- * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
- * loadFactor is less than 1
- */
- public FastMap(int size, int maxSize, float loadFactor) {
- Preconditions.checkArgument(size >= 0, "size must be at least 0");
- Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
- this.loadFactor = loadFactor;
- int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
- Preconditions.checkArgument(size < max, "size must be less than " + max);
- Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
- int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
- keys = (K[]) new Object[hashSize];
- values = (V[]) new Object[hashSize];
- this.maxSize = maxSize;
- this.countingAccesses = maxSize != Integer.MAX_VALUE;
- this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
- }
-
- private int find(Object key) {
- int theHashCode = key.hashCode() & 0x7FFFFFFF; // make sure it's positive
- K[] keys = this.keys;
- int hashSize = keys.length;
- int jump = 1 + theHashCode % (hashSize - 2);
- int index = theHashCode % hashSize;
- K currentKey = keys[index];
- while (currentKey != null && (currentKey == REMOVED || !key.equals(currentKey))) {
- if (index < jump) {
- index += hashSize - jump;
- } else {
- index -= jump;
- }
- currentKey = keys[index];
- }
- return index;
- }
-
- @Override
- public V get(Object key) {
- if (key == null) {
- return null;
- }
- int index = find(key);
- if (countingAccesses) {
- recentlyAccessed.set(index);
- }
- return values[index];
- }
-
- @Override
- public int size() {
- return numEntries;
- }
-
- @Override
- public boolean isEmpty() {
- return numEntries == 0;
- }
-
- @Override
- public boolean containsKey(Object key) {
- return key != null && keys[find(key)] != null;
- }
-
- @Override
- public boolean containsValue(Object value) {
- if (value == null) {
- return false;
- }
- for (V theValue : values) {
- if (theValue != null && value.equals(theValue)) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * @throws NullPointerException
- * if key or value is null
- */
- @Override
- public V put(K key, V value) {
- if (key == null || value == null) {
- throw new NullPointerException();
- }
- // If less than half the slots are open, let's clear it up
- if (numSlotsUsed * loadFactor >= keys.length) {
- // If over half the slots used are actual entries, let's grow
- if (numEntries * loadFactor >= numSlotsUsed) {
- growAndRehash();
- } else {
- // Otherwise just rehash to clear REMOVED entries and don't grow
- rehash();
- }
- }
- // Here we may later consider implementing Brent's variation described on page 532
- int index = find(key);
- if (keys[index] == null) {
- // If size is limited,
- if (countingAccesses && numEntries >= maxSize) {
- // and we're too large, clear some old-ish entry
- clearStaleEntry(index);
- }
- keys[index] = key;
- values[index] = value;
- numEntries++;
- numSlotsUsed++;
- return null;
- } else {
- V oldValue = values[index];
- values[index] = value;
- return oldValue;
- }
- }
-
- private void clearStaleEntry(int index) {
- while (true) {
- K currentKey;
- do {
- if (index == 0) {
- index = keys.length - 1;
- } else {
- index--;
- }
- currentKey = keys[index];
- } while (currentKey == null || currentKey == REMOVED);
- if (recentlyAccessed.get(index)) {
- recentlyAccessed.clear(index);
- } else {
- break;
- }
- }
- // Delete the entry
- ((Object[])keys)[index] = REMOVED;
- numEntries--;
- values[index] = null;
- }
-
- @Override
- public void putAll(Map extends K,? extends V> map) {
- for (Entry extends K,? extends V> entry : map.entrySet()) {
- put(entry.getKey(), entry.getValue());
- }
- }
-
- @Override
- public V remove(Object key) {
- if (key == null) {
- return null;
- }
- int index = find(key);
- if (keys[index] == null) {
- return null;
- } else {
- ((Object[])keys)[index] = REMOVED;
- numEntries--;
- V oldValue = values[index];
- values[index] = null;
- // don't decrement numSlotsUsed
- return oldValue;
- }
- // Could un-set recentlyAccessed's bit but doesn't matter
- }
-
- @Override
- public void clear() {
- numEntries = 0;
- numSlotsUsed = 0;
- Arrays.fill(keys, null);
- Arrays.fill(values, null);
- if (countingAccesses) {
- recentlyAccessed.clear();
- }
- }
-
- @Override
- public Set keySet() {
- return new KeySet();
- }
-
- @Override
- public Collection values() {
- return new ValueCollection();
- }
-
- @Override
- public Set> entrySet() {
- return new EntrySet();
- }
-
- public void rehash() {
- rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
- }
-
- private void growAndRehash() {
- if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
- throw new IllegalStateException("Can't grow any more");
- }
- rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
- }
-
- private void rehash(int newHashSize) {
- K[] oldKeys = keys;
- V[] oldValues = values;
- numEntries = 0;
- numSlotsUsed = 0;
- if (countingAccesses) {
- recentlyAccessed = new BitSet(newHashSize);
- }
- keys = (K[]) new Object[newHashSize];
- values = (V[]) new Object[newHashSize];
- int length = oldKeys.length;
- for (int i = 0; i < length; i++) {
- K key = oldKeys[i];
- if (key != null && key != REMOVED) {
- put(key, oldValues[i]);
- }
- }
- }
-
- void iteratorRemove(int lastNext) {
- if (lastNext >= values.length) {
- throw new NoSuchElementException();
- }
- if (lastNext < 0) {
- throw new IllegalStateException();
- }
- values[lastNext] = null;
- ((Object[])keys)[lastNext] = REMOVED;
- numEntries--;
- }
-
- @Override
- public FastMap clone() {
- FastMap clone;
- try {
- clone = (FastMap) super.clone();
- } catch (CloneNotSupportedException cnse) {
- throw new AssertionError();
- }
- clone.keys = keys.clone();
- clone.values = values.clone();
- clone.recentlyAccessed = countingAccesses ? new BitSet(keys.length) : null;
- return clone;
- }
-
- @Override
- public int hashCode() {
- int hash = 0;
- K[] keys = this.keys;
- int max = keys.length;
- for (int i = 0; i < max; i++) {
- K key = keys[i];
- if (key != null && key != REMOVED) {
- hash = 31 * hash + key.hashCode();
- hash = 31 * hash + values[i].hashCode();
- }
- }
- return hash;
- }
-
- @Override
- public boolean equals(Object other) {
- if (!(other instanceof FastMap)) {
- return false;
- }
- FastMap otherMap = (FastMap) other;
- K[] otherKeys = otherMap.keys;
- V[] otherValues = otherMap.values;
- int length = keys.length;
- int otherLength = otherKeys.length;
- int max = Math.min(length, otherLength);
-
- int i = 0;
- while (i < max) {
- K key = keys[i];
- K otherKey = otherKeys[i];
- if (key == null || key == REMOVED) {
- if (otherKey != null && otherKey != REMOVED) {
- return false;
- }
- } else {
- if (key != otherKey || !values[i].equals(otherValues[i])) {
- return false;
- }
- }
- i++;
- }
- while (i < length) {
- K key = keys[i];
- if (key != null && key != REMOVED) {
- return false;
- }
- i++;
- }
- while (i < otherLength) {
- K key = otherKeys[i];
- if (key != null && key != REMOVED) {
- return false;
- }
- i++;
- }
- return true;
- }
-
- @Override
- public String toString() {
- if (isEmpty()) {
- return "{}";
- }
- StringBuilder result = new StringBuilder();
- result.append('{');
- for (int i = 0; i < keys.length; i++) {
- K key = keys[i];
- if (key != null && key != REMOVED) {
- result.append(key).append('=').append(values[i]).append(',');
- }
- }
- result.setCharAt(result.length() - 1, '}');
- return result.toString();
- }
-
- private final class EntrySet extends AbstractSet> {
-
- @Override
- public int size() {
- return FastMap.this.size();
- }
-
- @Override
- public boolean isEmpty() {
- return FastMap.this.isEmpty();
- }
-
- @Override
- public boolean contains(Object o) {
- return containsKey(o);
- }
-
- @Override
- public Iterator> iterator() {
- return new EntryIterator();
- }
-
- @Override
- public boolean add(Entry t) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean remove(Object o) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean addAll(Collection extends Entry> ts) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean retainAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean removeAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void clear() {
- FastMap.this.clear();
- }
-
- private final class MapEntry implements Entry {
-
- private final int index;
-
- private MapEntry(int index) {
- this.index = index;
- }
-
- @Override
- public K getKey() {
- return keys[index];
- }
-
- @Override
- public V getValue() {
- return values[index];
- }
-
- @Override
- public V setValue(V value) {
- Preconditions.checkArgument(value != null);
- V oldValue = values[index];
- values[index] = value;
- return oldValue;
- }
- }
-
- private final class EntryIterator implements Iterator> {
-
- private int position;
- private int lastNext = -1;
-
- @Override
- public boolean hasNext() {
- goToNext();
- return position < keys.length;
- }
-
- @Override
- public Entry next() {
- goToNext();
- lastNext = position;
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return new MapEntry(position++);
- }
-
- private void goToNext() {
- int length = values.length;
- while (position < length && values[position] == null) {
- position++;
- }
- }
-
- @Override
- public void remove() {
- iteratorRemove(lastNext);
- }
- }
-
- }
-
- private final class KeySet extends AbstractSet {
-
- @Override
- public int size() {
- return FastMap.this.size();
- }
-
- @Override
- public boolean isEmpty() {
- return FastMap.this.isEmpty();
- }
-
- @Override
- public boolean contains(Object o) {
- return containsKey(o);
- }
-
- @Override
- public Iterator iterator() {
- return new KeyIterator();
- }
-
- @Override
- public boolean add(K t) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean remove(Object o) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean addAll(Collection extends K> ts) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean retainAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean removeAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void clear() {
- FastMap.this.clear();
- }
-
- private final class KeyIterator implements Iterator {
-
- private int position;
- private int lastNext = -1;
-
- @Override
- public boolean hasNext() {
- goToNext();
- return position < keys.length;
- }
-
- @Override
- public K next() {
- goToNext();
- lastNext = position;
- if (position >= keys.length) {
- throw new NoSuchElementException();
- }
- return keys[position++];
- }
-
- private void goToNext() {
- int length = values.length;
- while (position < length && values[position] == null) {
- position++;
- }
- }
-
- @Override
- public void remove() {
- iteratorRemove(lastNext);
- }
- }
-
- }
-
- private final class ValueCollection extends AbstractCollection {
-
- @Override
- public int size() {
- return FastMap.this.size();
- }
-
- @Override
- public boolean isEmpty() {
- return FastMap.this.isEmpty();
- }
-
- @Override
- public boolean contains(Object o) {
- return containsValue(o);
- }
-
- @Override
- public Iterator iterator() {
- return new ValueIterator();
- }
-
- @Override
- public boolean add(V v) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean remove(Object o) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean addAll(Collection extends V> vs) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean removeAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean retainAll(Collection> objects) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void clear() {
- FastMap.this.clear();
- }
-
- private final class ValueIterator implements Iterator {
-
- private int position;
- private int lastNext = -1;
-
- @Override
- public boolean hasNext() {
- goToNext();
- return position < values.length;
- }
-
- @Override
- public V next() {
- goToNext();
- lastNext = position;
- if (position >= values.length) {
- throw new NoSuchElementException();
- }
- return values[position++];
- }
-
- private void goToNext() {
- int length = values.length;
- while (position < length && values[position] == null) {
- position++;
- }
- }
-
- @Override
- public void remove() {
- iteratorRemove(lastNext);
- }
-
- }
-
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
deleted file mode 100644
index 9c68e111b..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-
-/**
- *
- * A simple class that represents a fixed value of an average and count. This is useful
- * when an API needs to return {@link RunningAverage} but is not in a position to accept
- * updates to it.
- *
- */
-public class FixedRunningAverage implements RunningAverage, Serializable {
-
- private final double average;
- private final int count;
-
- public FixedRunningAverage(double average, int count) {
- this.average = average;
- this.count = count;
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public synchronized void addDatum(double datum) {
- throw new UnsupportedOperationException();
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public synchronized void removeDatum(double datum) {
- throw new UnsupportedOperationException();
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public synchronized void changeDatum(double delta) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public synchronized int getCount() {
- return count;
- }
-
- @Override
- public synchronized double getAverage() {
- return average;
- }
-
- @Override
- public RunningAverage inverse() {
- return new InvertedRunningAverage(this);
- }
-
- @Override
- public synchronized String toString() {
- return String.valueOf(average);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
deleted file mode 100644
index 0447ac956..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-/**
- *
- * A simple class that represents a fixed value of an average, count and standard deviation. This is useful
- * when an API needs to return {@link RunningAverageAndStdDev} but is not in a position to accept
- * updates to it.
- *
- */
-public final class FixedRunningAverageAndStdDev extends FixedRunningAverage implements RunningAverageAndStdDev {
-
- private final double stdDev;
-
- public FixedRunningAverageAndStdDev(double average, double stdDev, int count) {
- super(average, count);
- this.stdDev = stdDev;
- }
-
- @Override
- public RunningAverageAndStdDev inverse() {
- return new InvertedRunningAverageAndStdDev(this);
- }
-
- @Override
- public synchronized String toString() {
- return super.toString() + ',' + stdDev;
- }
-
- @Override
- public double getStandardDeviation() {
- return stdDev;
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
deleted file mode 100644
index 04ff312d2..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-
-/**
- *
- * A simple class that can keep track of a running avearage of a series of numbers. One can add to or remove
- * from the series, as well as update a datum in the series. The class does not actually keep track of the
- * series of values, just its running average, so it doesn't even matter if you remove/change a value that
- * wasn't added.
- *
- */
-public class FullRunningAverage implements RunningAverage, Serializable {
-
- private int count;
- private double average;
-
- public FullRunningAverage() {
- this(0, Double.NaN);
- }
-
- public FullRunningAverage(int count, double average) {
- this.count = count;
- this.average = average;
- }
-
- /**
- * @param datum
- * new item to add to the running average
- */
- @Override
- public synchronized void addDatum(double datum) {
- if (++count == 1) {
- average = datum;
- } else {
- average = average * (count - 1) / count + datum / count;
- }
- }
-
- /**
- * @param datum
- * item to remove to the running average
- * @throws IllegalStateException
- * if count is 0
- */
- @Override
- public synchronized void removeDatum(double datum) {
- if (count == 0) {
- throw new IllegalStateException();
- }
- if (--count == 0) {
- average = Double.NaN;
- } else {
- average = average * (count + 1) / count - datum / count;
- }
- }
-
- /**
- * @param delta
- * amount by which to change a datum in the running average
- * @throws IllegalStateException
- * if count is 0
- */
- @Override
- public synchronized void changeDatum(double delta) {
- if (count == 0) {
- throw new IllegalStateException();
- }
- average += delta / count;
- }
-
- @Override
- public synchronized int getCount() {
- return count;
- }
-
- @Override
- public synchronized double getAverage() {
- return average;
- }
-
- @Override
- public RunningAverage inverse() {
- return new InvertedRunningAverage(this);
- }
-
- @Override
- public synchronized String toString() {
- return String.valueOf(average);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
deleted file mode 100644
index 6212e6616..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-/**
- *
- * Extends {@link FullRunningAverage} to add a running standard deviation computation.
- * Uses Welford's method, as described at http://www.johndcook.com/standard_deviation.html
- *
- */
-public final class FullRunningAverageAndStdDev extends FullRunningAverage implements RunningAverageAndStdDev {
-
- private double stdDev;
- private double mk;
- private double sk;
-
- public FullRunningAverageAndStdDev() {
- mk = 0.0;
- sk = 0.0;
- recomputeStdDev();
- }
-
- public FullRunningAverageAndStdDev(int count, double average, double mk, double sk) {
- super(count, average);
- this.mk = mk;
- this.sk = sk;
- recomputeStdDev();
- }
-
- public double getMk() {
- return mk;
- }
-
- public double getSk() {
- return sk;
- }
-
- @Override
- public synchronized double getStandardDeviation() {
- return stdDev;
- }
-
- @Override
- public synchronized void addDatum(double datum) {
- super.addDatum(datum);
- int count = getCount();
- if (count == 1) {
- mk = datum;
- sk = 0.0;
- } else {
- double oldmk = mk;
- double diff = datum - oldmk;
- mk += diff / count;
- sk += diff * (datum - mk);
- }
- recomputeStdDev();
- }
-
- @Override
- public synchronized void removeDatum(double datum) {
- int oldCount = getCount();
- super.removeDatum(datum);
- double oldmk = mk;
- mk = (oldCount * oldmk - datum) / (oldCount - 1);
- sk -= (datum - mk) * (datum - oldmk);
- recomputeStdDev();
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public void changeDatum(double delta) {
- throw new UnsupportedOperationException();
- }
-
- private synchronized void recomputeStdDev() {
- int count = getCount();
- stdDev = count > 1 ? Math.sqrt(sk / (count - 1)) : Double.NaN;
- }
-
- @Override
- public RunningAverageAndStdDev inverse() {
- return new InvertedRunningAverageAndStdDev(this);
- }
-
- @Override
- public synchronized String toString() {
- return String.valueOf(String.valueOf(getAverage()) + ',' + stdDev);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
deleted file mode 100644
index ffd9b2271..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-public final class InvertedRunningAverage implements RunningAverage {
-
- private final RunningAverage delegate;
-
- public InvertedRunningAverage(RunningAverage delegate) {
- this.delegate = delegate;
- }
-
- @Override
- public void addDatum(double datum) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void removeDatum(double datum) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void changeDatum(double delta) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int getCount() {
- return delegate.getCount();
- }
-
- @Override
- public double getAverage() {
- return -delegate.getAverage();
- }
-
- @Override
- public RunningAverage inverse() {
- return delegate;
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
deleted file mode 100644
index 3b2b6d886..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-public final class InvertedRunningAverageAndStdDev implements RunningAverageAndStdDev {
-
- private final RunningAverageAndStdDev delegate;
-
- public InvertedRunningAverageAndStdDev(RunningAverageAndStdDev delegate) {
- this.delegate = delegate;
- }
-
- @Override
- public void addDatum(double datum) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void removeDatum(double datum) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void changeDatum(double delta) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int getCount() {
- return delegate.getCount();
- }
-
- @Override
- public double getAverage() {
- return -delegate.getAverage();
- }
-
- @Override
- public double getStandardDeviation() {
- return delegate.getStandardDeviation();
- }
-
- @Override
- public RunningAverageAndStdDev inverse() {
- return delegate;
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
deleted file mode 100644
index 96e317c43..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.util.NoSuchElementException;
-
-import com.google.common.base.Preconditions;
-
-/**
- * While long[] is an Iterable, it is not an Iterable<Long>. This adapter class addresses that.
- */
-public final class LongPrimitiveArrayIterator implements LongPrimitiveIterator {
-
- private final long[] array;
- private int position;
- private final int max;
-
- /**
- *
- * Creates an over an entire array.
- *
- *
- * @param array
- * array to iterate over
- */
- public LongPrimitiveArrayIterator(long[] array) {
- this.array = Preconditions.checkNotNull(array); // yeah, not going to copy the array here, for performance
- this.position = 0;
- this.max = array.length;
- }
-
- @Override
- public boolean hasNext() {
- return position < max;
- }
-
- @Override
- public Long next() {
- return nextLong();
- }
-
- @Override
- public long nextLong() {
- if (position >= array.length) {
- throw new NoSuchElementException();
- }
- return array[position++];
- }
-
- @Override
- public long peek() {
- if (position >= array.length) {
- throw new NoSuchElementException();
- }
- return array[position];
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void skip(int n) {
- if (n > 0) {
- position += n;
- }
- }
-
- @Override
- public String toString() {
- return "LongPrimitiveArrayIterator";
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java
deleted file mode 100644
index 7776361dc..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-/**
- * Adds notion of iterating over {@code long} primitives in the style of an {@link java.util.Iterator} -- as
- * opposed to iterating over {@link Long}. Implementations of this interface however also implement
- * {@link java.util.Iterator} and {@link Iterable} over {@link Long} for convenience.
- */
-public interface LongPrimitiveIterator extends SkippingIterator {
-
- /**
- * @return next {@code long} in iteration
- * @throws java.util.NoSuchElementException
- * if no more elements exist in the iteration
- */
- long nextLong();
-
- /**
- * @return next {@code long} in iteration without advancing iteration
- */
- long peek();
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
deleted file mode 100644
index 87e54993c..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.locks.ReentrantLock;
-
-import com.google.common.collect.Lists;
-import org.apache.mahout.cf.taste.common.Refreshable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A helper class for implementing {@link Refreshable}. This object is typically included in an implementation
- * {@link Refreshable} to implement {@link Refreshable#refresh(Collection)}. It execute the class's own
- * supplied update logic, after updating all the object's dependencies. This also ensures that dependencies
- * are not updated multiple times.
- */
-public final class RefreshHelper implements Refreshable {
-
- private static final Logger log = LoggerFactory.getLogger(RefreshHelper.class);
-
- private final List dependencies;
- private final ReentrantLock refreshLock;
- private final Callable> refreshRunnable;
-
- /**
- * @param refreshRunnable
- * encapsulates the containing object's own refresh logic
- */
- public RefreshHelper(Callable> refreshRunnable) {
- this.dependencies = Lists.newArrayListWithCapacity(3);
- this.refreshLock = new ReentrantLock();
- this.refreshRunnable = refreshRunnable;
- }
-
- /** Add a dependency to be refreshed first when the encapsulating object does. */
- public void addDependency(Refreshable refreshable) {
- if (refreshable != null) {
- dependencies.add(refreshable);
- }
- }
-
- public void removeDependency(Refreshable refreshable) {
- if (refreshable != null) {
- dependencies.remove(refreshable);
- }
- }
-
- /**
- * Typically this is called in and is the entire body of
- * that method.
- */
- @Override
- public void refresh(Collection alreadyRefreshed) {
- if (refreshLock.tryLock()) {
- try {
- alreadyRefreshed = buildRefreshed(alreadyRefreshed);
- for (Refreshable dependency : dependencies) {
- maybeRefresh(alreadyRefreshed, dependency);
- }
- if (refreshRunnable != null) {
- try {
- refreshRunnable.call();
- } catch (Exception e) {
- log.warn("Unexpected exception while refreshing", e);
- }
- }
- } finally {
- refreshLock.unlock();
- }
- }
- }
-
- /**
- * Creates a new and empty {@link Collection} if the method parameter is {@code null}.
- *
- * @param currentAlreadyRefreshed
- * {@link Refreshable}s to refresh later on
- * @return an empty {@link Collection} if the method param was {@code null} or the unmodified method
- * param.
- */
- public static Collection buildRefreshed(Collection currentAlreadyRefreshed) {
- return currentAlreadyRefreshed == null ? new HashSet(3) : currentAlreadyRefreshed;
- }
-
- /**
- * Adds the specified {@link Refreshable} to the given collection of {@link Refreshable}s if it is not
- * already there and immediately refreshes it.
- *
- * @param alreadyRefreshed
- * the collection of {@link Refreshable}s
- * @param refreshable
- * the {@link Refreshable} to potentially add and refresh
- */
- public static void maybeRefresh(Collection alreadyRefreshed, Refreshable refreshable) {
- if (!alreadyRefreshed.contains(refreshable)) {
- alreadyRefreshed.add(refreshable);
- log.info("Added refreshable: {}", refreshable);
- refreshable.refresh(alreadyRefreshed);
- log.info("Refreshed: {}", alreadyRefreshed);
- }
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java
deleted file mode 100644
index 40da9de62..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-
-/**
- *
- * Implementations can retrieve a value for a given key.
- *
- */
-public interface Retriever {
-
- /**
- * @param key key for which a value should be retrieved
- * @return value for key
- * @throws TasteException if an error occurs while retrieving the value
- */
- V get(K key) throws TasteException;
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
deleted file mode 100644
index bf8e39c62..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-/**
- *
- * Interface for classes that can keep track of a running average of a series of numbers. One can add to or
- * remove from the series, as well as update a datum in the series. The class does not actually keep track of
- * the series of values, just its running average, so it doesn't even matter if you remove/change a value that
- * wasn't added.
- *
- */
-public interface RunningAverage {
-
- /**
- * @param datum
- * new item to add to the running average
- * @throws IllegalArgumentException
- * if datum is {@link Double#NaN}
- */
- void addDatum(double datum);
-
- /**
- * @param datum
- * item to remove to the running average
- * @throws IllegalArgumentException
- * if datum is {@link Double#NaN}
- * @throws IllegalStateException
- * if count is 0
- */
- void removeDatum(double datum);
-
- /**
- * @param delta
- * amount by which to change a datum in the running average
- * @throws IllegalArgumentException
- * if delta is {@link Double#NaN}
- * @throws IllegalStateException
- * if count is 0
- */
- void changeDatum(double delta);
-
- int getCount();
-
- double getAverage();
-
- /**
- * @return a (possibly immutable) object whose average is the negative of this object's
- */
- RunningAverage inverse();
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
deleted file mode 100644
index 4ac610897..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-/**
- *
- * Extends {@link RunningAverage} by adding standard deviation too.
- *
- */
-public interface RunningAverageAndStdDev extends RunningAverage {
-
- /** @return standard deviation of data */
- double getStandardDeviation();
-
- /**
- * @return a (possibly immutable) object whose average is the negative of this object's
- */
- @Override
- RunningAverageAndStdDev inverse();
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
deleted file mode 100644
index 390ab86a1..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.util.NoSuchElementException;
-import java.util.Random;
-
-import org.apache.mahout.common.RandomUtils;
-
-/**
- * Wraps a {@link LongPrimitiveIterator} and returns only some subset of the elements that it would,
- * as determined by a sampling rate parameter.
- */
-public final class SamplingLongPrimitiveIterator extends AbstractLongPrimitiveIterator {
-
- private final Random random;
- private final LongPrimitiveIterator delegate;
- private final double samplingRate;
- private long next;
- private boolean hasNext;
-
- public SamplingLongPrimitiveIterator(LongPrimitiveIterator delegate, double samplingRate) {
- random = RandomUtils.getRandom();
- this.delegate = delegate;
- this.samplingRate = samplingRate;
- this.hasNext = true;
- doNext();
- }
-
- @Override
- public boolean hasNext() {
- return hasNext;
- }
-
- @Override
- public long nextLong() {
- if (hasNext) {
- long result = next;
- doNext();
- return result;
- }
- throw new NoSuchElementException();
- }
-
- @Override
- public long peek() {
- if (hasNext) {
- return next;
- }
- throw new NoSuchElementException();
- }
-
- private void doNext() {
- int toSkip = 0;
- while (random.nextDouble() >= samplingRate) {
- toSkip++;
- }
- // Really, would be nicer to select value from geometric distribution, for small values of samplingRate
- if (toSkip > 0) {
- delegate.skip(toSkip);
- }
- if (delegate.hasNext()) {
- next = delegate.next();
- } else {
- hasNext = false;
- }
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void skip(int n) {
- delegate.skip((int) (n / samplingRate)); // Kind of an approximation, but this is expected skip
- if (delegate.hasNext()) {
- next = delegate.next();
- } else {
- hasNext = false;
- }
- }
-
- public static LongPrimitiveIterator maybeWrapIterator(LongPrimitiveIterator delegate, double samplingRate) {
- return samplingRate >= 1.0 ? delegate : new SamplingLongPrimitiveIterator(delegate, samplingRate);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
deleted file mode 100644
index e88f98a49..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.util.Iterator;
-
-/**
- * Adds ability to skip ahead in an iterator, perhaps more efficiently than by calling {@link #next()}
- * repeatedly.
- */
-public interface SkippingIterator extends Iterator {
-
- /**
- * Skip the next n elements supplied by this {@link Iterator}. If there are less than n elements remaining,
- * this skips all remaining elements in the {@link Iterator}. This method has the same effect as calling
- * {@link #next()} n times, except that it will never throw {@link java.util.NoSuchElementException}.
- */
- void skip(int n);
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
deleted file mode 100644
index 78a32d458..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-import java.io.Serializable;
-
-import com.google.common.base.Preconditions;
-
-public class WeightedRunningAverage implements RunningAverage, Serializable {
-
- private double totalWeight;
- private double average;
-
- public WeightedRunningAverage() {
- totalWeight = 0.0;
- average = Double.NaN;
- }
-
- @Override
- public synchronized void addDatum(double datum) {
- addDatum(datum, 1.0);
- }
-
- public synchronized void addDatum(double datum, double weight) {
- double oldTotalWeight = totalWeight;
- totalWeight += weight;
- if (oldTotalWeight <= 0.0) {
- average = datum;
- } else {
- average = average * oldTotalWeight / totalWeight + datum * weight / totalWeight;
- }
- }
-
- @Override
- public synchronized void removeDatum(double datum) {
- removeDatum(datum, 1.0);
- }
-
- public synchronized void removeDatum(double datum, double weight) {
- double oldTotalWeight = totalWeight;
- totalWeight -= weight;
- if (totalWeight <= 0.0) {
- average = Double.NaN;
- totalWeight = 0.0;
- } else {
- average = average * oldTotalWeight / totalWeight - datum * weight / totalWeight;
- }
- }
-
- @Override
- public synchronized void changeDatum(double delta) {
- changeDatum(delta, 1.0);
- }
-
- public synchronized void changeDatum(double delta, double weight) {
- Preconditions.checkArgument(weight <= totalWeight);
- average += delta * weight / totalWeight;
- }
-
- public synchronized double getTotalWeight() {
- return totalWeight;
- }
-
- /** @return {@link #getTotalWeight()} */
- @Override
- public synchronized int getCount() {
- return (int) totalWeight;
- }
-
- @Override
- public synchronized double getAverage() {
- return average;
- }
-
- @Override
- public RunningAverage inverse() {
- return new InvertedRunningAverage(this);
- }
-
- @Override
- public synchronized String toString() {
- return String.valueOf(average);
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
deleted file mode 100644
index bed5812b2..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common;
-
-/**
- * This subclass also provides for a weighted estimate of the sample standard deviation.
- * See estimate formulae here.
- */
-public final class WeightedRunningAverageAndStdDev extends WeightedRunningAverage implements RunningAverageAndStdDev {
-
- private double totalSquaredWeight;
- private double totalWeightedData;
- private double totalWeightedSquaredData;
-
- public WeightedRunningAverageAndStdDev() {
- totalSquaredWeight = 0.0;
- totalWeightedData = 0.0;
- totalWeightedSquaredData = 0.0;
- }
-
- @Override
- public synchronized void addDatum(double datum, double weight) {
- super.addDatum(datum, weight);
- totalSquaredWeight += weight * weight;
- double weightedData = datum * weight;
- totalWeightedData += weightedData;
- totalWeightedSquaredData += weightedData * datum;
- }
-
- @Override
- public synchronized void removeDatum(double datum, double weight) {
- super.removeDatum(datum, weight);
- totalSquaredWeight -= weight * weight;
- if (totalSquaredWeight <= 0.0) {
- totalSquaredWeight = 0.0;
- }
- double weightedData = datum * weight;
- totalWeightedData -= weightedData;
- if (totalWeightedData <= 0.0) {
- totalWeightedData = 0.0;
- }
- totalWeightedSquaredData -= weightedData * datum;
- if (totalWeightedSquaredData <= 0.0) {
- totalWeightedSquaredData = 0.0;
- }
- }
-
- /**
- * @throws UnsupportedOperationException
- */
- @Override
- public synchronized void changeDatum(double delta, double weight) {
- throw new UnsupportedOperationException();
- }
-
-
- @Override
- public synchronized double getStandardDeviation() {
- double totalWeight = getTotalWeight();
- return Math.sqrt((totalWeightedSquaredData * totalWeight - totalWeightedData * totalWeightedData)
- / (totalWeight * totalWeight - totalSquaredWeight));
- }
-
- @Override
- public RunningAverageAndStdDev inverse() {
- return new InvertedRunningAverageAndStdDev(this);
- }
-
- @Override
- public synchronized String toString() {
- return String.valueOf(String.valueOf(getAverage()) + ',' + getStandardDeviation());
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/AbstractJDBCComponent.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/AbstractJDBCComponent.java
deleted file mode 100644
index d1e93abe2..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/AbstractJDBCComponent.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common.jdbc;
-
-import javax.naming.Context;
-import javax.naming.InitialContext;
-import javax.naming.NamingException;
-import javax.sql.DataSource;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Preconditions;
-
-/**
- * A helper class with common elements for several JDBC-related components.
- */
-public abstract class AbstractJDBCComponent {
-
- private static final Logger log = LoggerFactory.getLogger(AbstractJDBCComponent.class);
-
- private static final int DEFAULT_FETCH_SIZE = 1000; // A max, "big" number of rows to buffer at once
- protected static final String DEFAULT_DATASOURCE_NAME = "jdbc/taste";
-
- protected static void checkNotNullAndLog(String argName, Object value) {
- Preconditions.checkArgument(value != null && !value.toString().isEmpty(),
- argName + " is null or empty");
- log.debug("{}: {}", argName, value);
- }
-
- protected static void checkNotNullAndLog(String argName, Object[] values) {
- Preconditions.checkArgument(values != null && values.length != 0, argName + " is null or zero-length");
- for (Object value : values) {
- checkNotNullAndLog(argName, value);
- }
- }
-
- /**
- *
- * Looks up a {@link DataSource} by name from JNDI. "java:comp/env/" is prepended to the argument before
- * looking up the name in JNDI.
- *
- *
- * @param dataSourceName
- * JNDI name where a {@link DataSource} is bound (e.g. "jdbc/taste")
- * @return {@link DataSource} under that JNDI name
- * @throws TasteException
- * if a JNDI error occurs
- */
- public static DataSource lookupDataSource(String dataSourceName) throws TasteException {
- Context context = null;
- try {
- context = new InitialContext();
- return (DataSource) context.lookup("java:comp/env/" + dataSourceName);
- } catch (NamingException ne) {
- throw new TasteException(ne);
- } finally {
- if (context != null) {
- try {
- context.close();
- } catch (NamingException ne) {
- log.warn("Error while closing Context; continuing...", ne);
- }
- }
- }
- }
-
- protected int getFetchSize() {
- return DEFAULT_FETCH_SIZE;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/EachRowIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/EachRowIterator.java
deleted file mode 100644
index 3f024bc1d..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/EachRowIterator.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common.jdbc;
-
-import javax.sql.DataSource;
-import java.io.Closeable;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-
-import com.google.common.collect.AbstractIterator;
-import org.apache.mahout.common.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Provides an {@link java.util.Iterator} over the result of an SQL query, as an iteration over the {@link ResultSet}.
- * While the same object will be returned from the iteration each time, it will be returned once for each row
- * of the result.
- */
-final class EachRowIterator extends AbstractIterator implements Closeable {
-
- private static final Logger log = LoggerFactory.getLogger(EachRowIterator.class);
-
- private final Connection connection;
- private final PreparedStatement statement;
- private final ResultSet resultSet;
-
- EachRowIterator(DataSource dataSource, String sqlQuery) throws SQLException {
- try {
- connection = dataSource.getConnection();
- statement = connection.prepareStatement(sqlQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
- statement.setFetchDirection(ResultSet.FETCH_FORWARD);
- //statement.setFetchSize(getFetchSize());
- log.debug("Executing SQL query: {}", sqlQuery);
- resultSet = statement.executeQuery();
- } catch (SQLException sqle) {
- close();
- throw sqle;
- }
- }
-
- @Override
- protected ResultSet computeNext() {
- try {
- if (resultSet.next()) {
- return resultSet;
- } else {
- close();
- return null;
- }
- } catch (SQLException sqle) {
- close();
- throw new IllegalStateException(sqle);
- }
- }
-
- public void skip(int n) throws SQLException {
- try {
- resultSet.relative(n);
- } catch (SQLException sqle) {
- // Can't use relative on MySQL Connector/J; try advancing manually
- int i = 0;
- while (i < n && resultSet.next()) {
- i++;
- }
- }
- }
-
- @Override
- public void close() {
- IOUtils.quietClose(resultSet, statement, connection);
- endOfData();
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ResultSetIterator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ResultSetIterator.java
deleted file mode 100644
index 09135d00a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ResultSetIterator.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.common.jdbc;
-
-import javax.sql.DataSource;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.Iterator;
-
-import com.google.common.base.Function;
-import com.google.common.collect.ForwardingIterator;
-import com.google.common.collect.Iterators;
-
-public abstract class ResultSetIterator extends ForwardingIterator {
-
- private final Iterator delegate;
- private final EachRowIterator rowDelegate;
-
- protected ResultSetIterator(DataSource dataSource, String sqlQuery) throws SQLException {
- this.rowDelegate = new EachRowIterator(dataSource, sqlQuery);
- delegate = Iterators.transform(rowDelegate,
- new Function() {
- @Override
- public T apply(ResultSet from) {
- try {
- return parseElement(from);
- } catch (SQLException sqle) {
- throw new IllegalStateException(sqle);
- }
- }
- });
- }
-
- @Override
- protected Iterator delegate() {
- return delegate;
- }
-
- protected abstract T parseElement(ResultSet resultSet) throws SQLException;
-
- public void skip(int n) {
- if (n >= 1) {
- try {
- rowDelegate.skip(n);
- } catch (SQLException sqle) {
- throw new IllegalStateException(sqle);
- }
- }
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
deleted file mode 100644
index 96066d257..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import com.google.common.collect.Lists;
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.eval.DataModelBuilder;
-import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
-import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
-import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
-import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
-import org.apache.mahout.cf.taste.impl.model.GenericPreference;
-import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.apache.mahout.common.RandomUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Preconditions;
-
-/**
- * Abstract superclass of a couple implementations, providing shared functionality.
- */
-public abstract class AbstractDifferenceRecommenderEvaluator implements RecommenderEvaluator {
-
- private static final Logger log = LoggerFactory.getLogger(AbstractDifferenceRecommenderEvaluator.class);
-
- private final Random random;
- private float maxPreference;
- private float minPreference;
-
- protected AbstractDifferenceRecommenderEvaluator() {
- random = RandomUtils.getRandom();
- maxPreference = Float.NaN;
- minPreference = Float.NaN;
- }
-
- @Override
- public final float getMaxPreference() {
- return maxPreference;
- }
-
- @Override
- public final void setMaxPreference(float maxPreference) {
- this.maxPreference = maxPreference;
- }
-
- @Override
- public final float getMinPreference() {
- return minPreference;
- }
-
- @Override
- public final void setMinPreference(float minPreference) {
- this.minPreference = minPreference;
- }
-
- @Override
- public double evaluate(RecommenderBuilder recommenderBuilder,
- DataModelBuilder dataModelBuilder,
- DataModel dataModel,
- double trainingPercentage,
- double evaluationPercentage) throws TasteException {
- Preconditions.checkNotNull(recommenderBuilder);
- Preconditions.checkNotNull(dataModel);
- Preconditions.checkArgument(trainingPercentage >= 0.0 && trainingPercentage <= 1.0,
- "Invalid trainingPercentage: " + trainingPercentage);
- Preconditions.checkArgument(evaluationPercentage >= 0.0 && evaluationPercentage <= 1.0,
- "Invalid evaluationPercentage: " + evaluationPercentage);
-
- log.info("Beginning evaluation using {} of {}", trainingPercentage, dataModel);
-
- int numUsers = dataModel.getNumUsers();
- FastByIDMap trainingPrefs = new FastByIDMap(
- 1 + (int) (evaluationPercentage * numUsers));
- FastByIDMap testPrefs = new FastByIDMap(
- 1 + (int) (evaluationPercentage * numUsers));
-
- LongPrimitiveIterator it = dataModel.getUserIDs();
- while (it.hasNext()) {
- long userID = it.nextLong();
- if (random.nextDouble() < evaluationPercentage) {
- splitOneUsersPrefs(trainingPercentage, trainingPrefs, testPrefs, userID, dataModel);
- }
- }
-
- DataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingPrefs)
- : dataModelBuilder.buildDataModel(trainingPrefs);
-
- Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);
-
- double result = getEvaluation(testPrefs, recommender);
- log.info("Evaluation result: {}", result);
- return result;
- }
-
- private void splitOneUsersPrefs(double trainingPercentage,
- FastByIDMap trainingPrefs,
- FastByIDMap testPrefs,
- long userID,
- DataModel dataModel) throws TasteException {
- List oneUserTrainingPrefs = null;
- List oneUserTestPrefs = null;
- PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
- int size = prefs.length();
- for (int i = 0; i < size; i++) {
- Preference newPref = new GenericPreference(userID, prefs.getItemID(i), prefs.getValue(i));
- if (random.nextDouble() < trainingPercentage) {
- if (oneUserTrainingPrefs == null) {
- oneUserTrainingPrefs = Lists.newArrayListWithCapacity(3);
- }
- oneUserTrainingPrefs.add(newPref);
- } else {
- if (oneUserTestPrefs == null) {
- oneUserTestPrefs = Lists.newArrayListWithCapacity(3);
- }
- oneUserTestPrefs.add(newPref);
- }
- }
- if (oneUserTrainingPrefs != null) {
- trainingPrefs.put(userID, new GenericUserPreferenceArray(oneUserTrainingPrefs));
- if (oneUserTestPrefs != null) {
- testPrefs.put(userID, new GenericUserPreferenceArray(oneUserTestPrefs));
- }
- }
- }
-
- private float capEstimatedPreference(float estimate) {
- if (estimate > maxPreference) {
- return maxPreference;
- }
- if (estimate < minPreference) {
- return minPreference;
- }
- return estimate;
- }
-
- private double getEvaluation(FastByIDMap testPrefs, Recommender recommender)
- throws TasteException {
- reset();
- Collection> estimateCallables = Lists.newArrayList();
- AtomicInteger noEstimateCounter = new AtomicInteger();
- for (Map.Entry entry : testPrefs.entrySet()) {
- estimateCallables.add(
- new PreferenceEstimateCallable(recommender, entry.getKey(), entry.getValue(), noEstimateCounter));
- }
- log.info("Beginning evaluation of {} users", estimateCallables.size());
- RunningAverageAndStdDev timing = new FullRunningAverageAndStdDev();
- execute(estimateCallables, noEstimateCounter, timing);
- return computeFinalEvaluation();
- }
-
- protected static void execute(Collection> callables,
- AtomicInteger noEstimateCounter,
- RunningAverageAndStdDev timing) throws TasteException {
-
- callables = wrapWithStatsCallables(callables, noEstimateCounter, timing);
- int numProcessors = Runtime.getRuntime().availableProcessors();
- ExecutorService executor = Executors.newFixedThreadPool(numProcessors);
- log.info("Starting timing of {} tasks in {} threads", callables.size(), numProcessors);
- try {
- List> futures = executor.invokeAll(callables);
- // Go look for exceptions here, really
- for (Future future : futures) {
- future.get();
- }
- } catch (InterruptedException ie) {
- throw new TasteException(ie);
- } catch (ExecutionException ee) {
- throw new TasteException(ee.getCause());
- }
- executor.shutdown();
- }
-
- private static Collection> wrapWithStatsCallables(Iterable> callables,
- AtomicInteger noEstimateCounter,
- RunningAverageAndStdDev timing) {
- Collection> wrapped = Lists.newArrayList();
- int count = 0;
- for (Callable callable : callables) {
- boolean logStats = count++ % 1000 == 0; // log every 1000 or so iterations
- wrapped.add(new StatsCallable(callable, logStats, timing, noEstimateCounter));
- }
- return wrapped;
- }
-
- protected abstract void reset();
-
- protected abstract void processOneEstimate(float estimatedPreference, Preference realPref);
-
- protected abstract double computeFinalEvaluation();
-
- public final class PreferenceEstimateCallable implements Callable {
-
- private final Recommender recommender;
- private final long testUserID;
- private final PreferenceArray prefs;
- private final AtomicInteger noEstimateCounter;
-
- public PreferenceEstimateCallable(Recommender recommender,
- long testUserID,
- PreferenceArray prefs,
- AtomicInteger noEstimateCounter) {
- this.recommender = recommender;
- this.testUserID = testUserID;
- this.prefs = prefs;
- this.noEstimateCounter = noEstimateCounter;
- }
-
- @Override
- public Void call() throws TasteException {
- for (Preference realPref : prefs) {
- float estimatedPreference = Float.NaN;
- try {
- estimatedPreference = recommender.estimatePreference(testUserID, realPref.getItemID());
- } catch (NoSuchUserException nsue) {
- // It's possible that an item exists in the test data but not training data in which case
- // NSEE will be thrown. Just ignore it and move on.
- log.info("User exists in test data but not training data: {}", testUserID);
- } catch (NoSuchItemException nsie) {
- log.info("Item exists in test data but not training data: {}", realPref.getItemID());
- }
- if (Float.isNaN(estimatedPreference)) {
- noEstimateCounter.incrementAndGet();
- } else {
- estimatedPreference = capEstimatedPreference(estimatedPreference);
- processOneEstimate(estimatedPreference, realPref);
- }
- }
- return null;
- }
-
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java
deleted file mode 100644
index 4dad0400d..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.model.Preference;
-
-/**
- *
- * A {@link org.apache.mahout.cf.taste.eval.RecommenderEvaluator} which computes the average absolute
- * difference between predicted and actual ratings for users.
- *
- *
- *
- * This algorithm is also called "mean average error".
- *
- */
-public final class AverageAbsoluteDifferenceRecommenderEvaluator extends
- AbstractDifferenceRecommenderEvaluator {
-
- private RunningAverage average;
-
- @Override
- protected void reset() {
- average = new FullRunningAverage();
- }
-
- @Override
- protected void processOneEstimate(float estimatedPreference, Preference realPref) {
- average.addDatum(Math.abs(realPref.getValue() - estimatedPreference));
- }
-
- @Override
- protected double computeFinalEvaluation() {
- return average.getAverage();
- }
-
- @Override
- public String toString() {
- return "AverageAbsoluteDifferenceRecommenderEvaluator";
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
deleted file mode 100644
index 00996edfc..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
+++ /dev/null
@@ -1,237 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import java.util.List;
-import java.util.Random;
-
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.eval.DataModelBuilder;
-import org.apache.mahout.cf.taste.eval.IRStatistics;
-import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
-import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
-import org.apache.mahout.cf.taste.eval.RelevantItemsDataSplitter;
-import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.recommender.IDRescorer;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.apache.mahout.common.RandomUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Preconditions;
-
-/**
- *
- * For each user, these implementation determine the top {@code n} preferences, then evaluate the IR
- * statistics based on a {@link DataModel} that does not have these values. This number {@code n} is the
- * "at" value, as in "precision at 5". For example, this would mean precision evaluated by removing the top 5
- * preferences for a user and then finding the percentage of those 5 items included in the top 5
- * recommendations for that user.
- *
- */
-public final class GenericRecommenderIRStatsEvaluator implements RecommenderIRStatsEvaluator {
-
- private static final Logger log = LoggerFactory.getLogger(GenericRecommenderIRStatsEvaluator.class);
-
- private static final double LOG2 = Math.log(2.0);
-
- /**
- * Pass as "relevanceThreshold" argument to
- * {@link #evaluate(RecommenderBuilder, DataModelBuilder, DataModel, IDRescorer, int, double, double)} to
- * have it attempt to compute a reasonable threshold. Note that this will impact performance.
- */
- public static final double CHOOSE_THRESHOLD = Double.NaN;
-
- private final Random random;
- private final RelevantItemsDataSplitter dataSplitter;
-
- public GenericRecommenderIRStatsEvaluator() {
- this(new GenericRelevantItemsDataSplitter());
- }
-
- public GenericRecommenderIRStatsEvaluator(RelevantItemsDataSplitter dataSplitter) {
- Preconditions.checkNotNull(dataSplitter);
- random = RandomUtils.getRandom();
- this.dataSplitter = dataSplitter;
- }
-
- @Override
- public IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
- DataModelBuilder dataModelBuilder,
- DataModel dataModel,
- IDRescorer rescorer,
- int at,
- double relevanceThreshold,
- double evaluationPercentage) throws TasteException {
-
- Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null");
- Preconditions.checkArgument(dataModel != null, "dataModel is null");
- Preconditions.checkArgument(at >= 1, "at must be at least 1");
- Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0,
- "Invalid evaluationPercentage: %s", evaluationPercentage);
-
- int numItems = dataModel.getNumItems();
- RunningAverage precision = new FullRunningAverage();
- RunningAverage recall = new FullRunningAverage();
- RunningAverage fallOut = new FullRunningAverage();
- RunningAverage nDCG = new FullRunningAverage();
- int numUsersRecommendedFor = 0;
- int numUsersWithRecommendations = 0;
-
- LongPrimitiveIterator it = dataModel.getUserIDs();
- while (it.hasNext()) {
-
- long userID = it.nextLong();
-
- if (random.nextDouble() >= evaluationPercentage) {
- // Skipped
- continue;
- }
-
- long start = System.currentTimeMillis();
-
- PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
-
- // List some most-preferred items that would count as (most) "relevant" results
- double theRelevanceThreshold = Double.isNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold;
- FastIDSet relevantItemIDs = dataSplitter.getRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel);
-
- int numRelevantItems = relevantItemIDs.size();
- if (numRelevantItems <= 0) {
- continue;
- }
-
- FastByIDMap trainingUsers = new FastByIDMap(dataModel.getNumUsers());
- LongPrimitiveIterator it2 = dataModel.getUserIDs();
- while (it2.hasNext()) {
- dataSplitter.processOtherUser(userID, relevantItemIDs, trainingUsers, it2.nextLong(), dataModel);
- }
-
- DataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers)
- : dataModelBuilder.buildDataModel(trainingUsers);
- try {
- trainingModel.getPreferencesFromUser(userID);
- } catch (NoSuchUserException nsee) {
- continue; // Oops we excluded all prefs for the user -- just move on
- }
-
- int size = relevantItemIDs.size() + trainingModel.getItemIDsFromUser(userID).size();
- if (size < 2 * at) {
- // Really not enough prefs to meaningfully evaluate this user
- continue;
- }
-
- Recommender recommender = recommenderBuilder.buildRecommender(trainingModel);
-
- int intersectionSize = 0;
- List recommendedItems = recommender.recommend(userID, at, rescorer);
- for (RecommendedItem recommendedItem : recommendedItems) {
- if (relevantItemIDs.contains(recommendedItem.getItemID())) {
- intersectionSize++;
- }
- }
-
- int numRecommendedItems = recommendedItems.size();
-
- // Precision
- if (numRecommendedItems > 0) {
- precision.addDatum((double) intersectionSize / (double) numRecommendedItems);
- }
-
- // Recall
- recall.addDatum((double) intersectionSize / (double) numRelevantItems);
-
- // Fall-out
- if (numRelevantItems < size) {
- fallOut.addDatum((double) (numRecommendedItems - intersectionSize)
- / (double) (numItems - numRelevantItems));
- }
-
- // nDCG
- // In computing, assume relevant IDs have relevance 1 and others 0
- double cumulativeGain = 0.0;
- double idealizedGain = 0.0;
- for (int i = 0; i < recommendedItems.size(); i++) {
- RecommendedItem item = recommendedItems.get(i);
- double discount = i == 0 ? 1.0 : 1.0 / log2(i + 1);
- if (relevantItemIDs.contains(item.getItemID())) {
- cumulativeGain += discount;
- }
- // otherwise we're multiplying discount by relevance 0 so it doesn't do anything
-
- // Ideally results would be ordered with all relevant ones first, so this theoretical
- // ideal list starts with number of relevant items equal to the total number of relevant items
- if (i < relevantItemIDs.size()) {
- idealizedGain += discount;
- }
- }
- nDCG.addDatum(cumulativeGain / idealizedGain);
-
- // Reach
- numUsersRecommendedFor++;
- if (numRecommendedItems > 0) {
- numUsersWithRecommendations++;
- }
-
- long end = System.currentTimeMillis();
-
- log.info("Evaluated with user {} in {}ms", userID, end - start);
- log.info("Precision/recall/fall-out/nDCG: {} / {} / {} / {}", new Object[] {
- precision.getAverage(), recall.getAverage(), fallOut.getAverage(), nDCG.getAverage()
- });
- }
-
- double reach = (double) numUsersWithRecommendations / (double) numUsersRecommendedFor;
-
- return new IRStatisticsImpl(
- precision.getAverage(),
- recall.getAverage(),
- fallOut.getAverage(),
- nDCG.getAverage(),
- reach);
- }
-
- private static double computeThreshold(PreferenceArray prefs) {
- if (prefs.length() < 2) {
- // Not enough data points -- return a threshold that allows everything
- return Double.NEGATIVE_INFINITY;
- }
- RunningAverageAndStdDev stdDev = new FullRunningAverageAndStdDev();
- int size = prefs.length();
- for (int i = 0; i < size; i++) {
- stdDev.addDatum(prefs.getValue(i));
- }
- return stdDev.getAverage() + stdDev.getStandardDeviation();
- }
-
- private static double log2(double value) {
- return Math.log(value) / LOG2;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
deleted file mode 100644
index fb4858c40..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import com.google.common.collect.Lists;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.eval.RelevantItemsDataSplitter;
-import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * Picks relevant items to be those with the strongest preference, and
- * includes the other users' preferences in full.
- */
-public final class GenericRelevantItemsDataSplitter implements RelevantItemsDataSplitter {
-
- @Override
- public FastIDSet getRelevantItemsIDs(long userID,
- int at,
- double relevanceThreshold,
- DataModel dataModel) throws TasteException {
- PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
- FastIDSet relevantItemIDs = new FastIDSet(at);
- prefs.sortByValueReversed();
- for (int i = 0; i < prefs.length() && relevantItemIDs.size() < at; i++) {
- if (prefs.getValue(i) >= relevanceThreshold) {
- relevantItemIDs.add(prefs.getItemID(i));
- }
- }
- return relevantItemIDs;
- }
-
- @Override
- public void processOtherUser(long userID,
- FastIDSet relevantItemIDs,
- FastByIDMap trainingUsers,
- long otherUserID,
- DataModel dataModel) throws TasteException {
- PreferenceArray prefs2Array = dataModel.getPreferencesFromUser(otherUserID);
- // If we're dealing with the very user that we're evaluating for precision/recall,
- if (userID == otherUserID) {
- // then must remove all the test IDs, the "relevant" item IDs
- List prefs2 = Lists.newArrayListWithCapacity(prefs2Array.length());
- for (Preference pref : prefs2Array) {
- prefs2.add(pref);
- }
- for (Iterator iterator = prefs2.iterator(); iterator.hasNext(); ) {
- Preference pref = iterator.next();
- if (relevantItemIDs.contains(pref.getItemID())) {
- iterator.remove();
- }
- }
- if (!prefs2.isEmpty()) {
- trainingUsers.put(otherUserID, new GenericUserPreferenceArray(prefs2));
- }
- } else {
- // otherwise just add all those other user's prefs
- trainingUsers.put(otherUserID, prefs2Array);
- }
- }
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
deleted file mode 100644
index 920935ebb..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import java.io.Serializable;
-
-import org.apache.mahout.cf.taste.eval.IRStatistics;
-
-import com.google.common.base.Preconditions;
-
-public final class IRStatisticsImpl implements IRStatistics, Serializable {
-
- private final double precision;
- private final double recall;
- private final double fallOut;
- private final double ndcg;
- private final double reach;
-
- IRStatisticsImpl(double precision, double recall, double fallOut, double ndcg, double reach) {
- Preconditions.checkArgument(precision >= 0.0 && precision <= 1.0, "Illegal precision: " + precision);
- Preconditions.checkArgument(recall >= 0.0 && recall <= 1.0, "Illegal recall: " + recall);
- Preconditions.checkArgument(fallOut >= 0.0 && fallOut <= 1.0, "Illegal fallOut: " + fallOut);
- Preconditions.checkArgument(ndcg >= 0.0 && ndcg <= 1.0, "Illegal nDCG: " + ndcg);
- Preconditions.checkArgument(reach >= 0.0 && reach <= 1.0, "Illegal reach: " + reach);
- this.precision = precision;
- this.recall = recall;
- this.fallOut = fallOut;
- this.ndcg = ndcg;
- this.reach = reach;
- }
-
- @Override
- public double getPrecision() {
- return precision;
- }
-
- @Override
- public double getRecall() {
- return recall;
- }
-
- @Override
- public double getFallOut() {
- return fallOut;
- }
-
- @Override
- public double getF1Measure() {
- return getFNMeasure(1.0);
- }
-
- @Override
- public double getFNMeasure(double b) {
- double b2 = b * b;
- double sum = b2 * precision + recall;
- return sum == 0.0 ? Double.NaN : (1.0 + b2) * precision * recall / sum;
- }
-
- @Override
- public double getNormalizedDiscountedCumulativeGain() {
- return ndcg;
- }
-
- @Override
- public double getReach() {
- return reach;
- }
-
- @Override
- public String toString() {
- return "IRStatisticsImpl[precision:" + precision + ",recall:" + recall + ",fallOut:"
- + fallOut + ",nDCG:" + ndcg + ",reach:" + reach + ']';
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadCallable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadCallable.java
deleted file mode 100644
index 213f7f960..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadCallable.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import org.apache.mahout.cf.taste.recommender.Recommender;
-
-import java.util.concurrent.Callable;
-
-final class LoadCallable implements Callable {
-
- private final Recommender recommender;
- private final long userID;
-
- LoadCallable(Recommender recommender, long userID) {
- this.recommender = recommender;
- this.userID = userID;
- }
-
- @Override
- public Void call() throws Exception {
- recommender.recommend(userID, 10);
- return null;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
deleted file mode 100644
index b606047cc..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import java.util.Collection;
-import java.util.concurrent.Callable;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import com.google.common.collect.Lists;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
-import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
-import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.recommender.Recommender;
-
-/**
- * Simple helper class for running load on a Recommender.
- */
-public final class LoadEvaluator {
-
- private LoadEvaluator() { }
-
- public static LoadStatistics runLoad(Recommender recommender) throws TasteException {
- return runLoad(recommender, 10);
- }
-
- public static LoadStatistics runLoad(Recommender recommender, int howMany) throws TasteException {
- DataModel dataModel = recommender.getDataModel();
- int numUsers = dataModel.getNumUsers();
- double sampleRate = 1000.0 / numUsers;
- LongPrimitiveIterator userSampler =
- SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(), sampleRate);
- recommender.recommend(userSampler.next(), howMany); // Warm up
- Collection> callables = Lists.newArrayList();
- while (userSampler.hasNext()) {
- callables.add(new LoadCallable(recommender, userSampler.next()));
- }
- AtomicInteger noEstimateCounter = new AtomicInteger();
- RunningAverageAndStdDev timing = new FullRunningAverageAndStdDev();
- AbstractDifferenceRecommenderEvaluator.execute(callables, noEstimateCounter, timing);
- return new LoadStatistics(timing);
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadStatistics.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadStatistics.java
deleted file mode 100644
index f89160c23..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadStatistics.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-
-public final class LoadStatistics {
-
- private final RunningAverage timing;
-
- LoadStatistics(RunningAverage timing) {
- this.timing = timing;
- }
-
- public RunningAverage getTiming() {
- return timing;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
deleted file mode 100644
index 00a8b2fc9..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
+++ /dev/null
@@ -1,431 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Evaluate recommender by comparing order of all raw prefs with order in
- * recommender's output for that user. Can also compare data models.
- */
-public final class OrderBasedRecommenderEvaluator {
-
- private static final Logger log = LoggerFactory.getLogger(OrderBasedRecommenderEvaluator.class);
-
- private OrderBasedRecommenderEvaluator() {
- }
-
- public static void evaluate(Recommender recommender1,
- Recommender recommender2,
- int samples,
- RunningAverage tracker,
- String tag) throws TasteException {
- printHeader();
- LongPrimitiveIterator users = recommender1.getDataModel().getUserIDs();
-
- while (users.hasNext()) {
- long userID = users.nextLong();
- List recs1 = recommender1.recommend(userID, samples);
- List recs2 = recommender2.recommend(userID, samples);
- FastIDSet commonSet = new FastIDSet();
- long maxItemID = setBits(commonSet, recs1, samples);
- FastIDSet otherSet = new FastIDSet();
- maxItemID = Math.max(maxItemID, setBits(otherSet, recs2, samples));
- int max = mask(commonSet, otherSet, maxItemID);
- max = Math.min(max, samples);
- if (max < 2) {
- continue;
- }
- Long[] items1 = getCommonItems(commonSet, recs1, max);
- Long[] items2 = getCommonItems(commonSet, recs2, max);
- double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
- tracker.addDatum(variance);
- }
- }
-
- public static void evaluate(Recommender recommender,
- DataModel model,
- int samples,
- RunningAverage tracker,
- String tag) throws TasteException {
- printHeader();
- LongPrimitiveIterator users = recommender.getDataModel().getUserIDs();
- while (users.hasNext()) {
- long userID = users.nextLong();
- List recs1 = recommender.recommend(userID, model.getNumItems());
- PreferenceArray prefs2 = model.getPreferencesFromUser(userID);
- prefs2.sortByValueReversed();
- FastIDSet commonSet = new FastIDSet();
- long maxItemID = setBits(commonSet, recs1, samples);
- FastIDSet otherSet = new FastIDSet();
- maxItemID = Math.max(maxItemID, setBits(otherSet, prefs2, samples));
- int max = mask(commonSet, otherSet, maxItemID);
- max = Math.min(max, samples);
- if (max < 2) {
- continue;
- }
- Long[] items1 = getCommonItems(commonSet, recs1, max);
- Long[] items2 = getCommonItems(commonSet, prefs2, max);
- double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
- tracker.addDatum(variance);
- }
- }
-
- public static void evaluate(DataModel model1,
- DataModel model2,
- int samples,
- RunningAverage tracker,
- String tag) throws TasteException {
- printHeader();
- LongPrimitiveIterator users = model1.getUserIDs();
- while (users.hasNext()) {
- long userID = users.nextLong();
- PreferenceArray prefs1 = model1.getPreferencesFromUser(userID);
- PreferenceArray prefs2 = model2.getPreferencesFromUser(userID);
- prefs1.sortByValueReversed();
- prefs2.sortByValueReversed();
- FastIDSet commonSet = new FastIDSet();
- long maxItemID = setBits(commonSet, prefs1, samples);
- FastIDSet otherSet = new FastIDSet();
- maxItemID = Math.max(maxItemID, setBits(otherSet, prefs2, samples));
- int max = mask(commonSet, otherSet, maxItemID);
- max = Math.min(max, samples);
- if (max < 2) {
- continue;
- }
- Long[] items1 = getCommonItems(commonSet, prefs1, max);
- Long[] items2 = getCommonItems(commonSet, prefs2, max);
- double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2);
- tracker.addDatum(variance);
- }
- }
-
- /**
- * This exists because FastIDSet has 'retainAll' as MASK, but there is
- * no count of the number of items in the set. size() is supposed to do
- * this but does not work.
- */
- private static int mask(FastIDSet commonSet, FastIDSet otherSet, long maxItemID) {
- int count = 0;
- for (int i = 0; i <= maxItemID; i++) {
- if (commonSet.contains(i)) {
- if (otherSet.contains(i)) {
- count++;
- } else {
- commonSet.remove(i);
- }
- }
- }
- return count;
- }
-
- private static Long[] getCommonItems(FastIDSet commonSet, Iterable recs, int max) {
- Long[] commonItems = new Long[max];
- int index = 0;
- for (RecommendedItem rec : recs) {
- Long item = rec.getItemID();
- if (commonSet.contains(item)) {
- commonItems[index++] = item;
- }
- if (index == max) {
- break;
- }
- }
- return commonItems;
- }
-
- private static Long[] getCommonItems(FastIDSet commonSet, PreferenceArray prefs1, int max) {
- Long[] commonItems = new Long[max];
- int index = 0;
- for (int i = 0; i < prefs1.length(); i++) {
- Long item = prefs1.getItemID(i);
- if (commonSet.contains(item)) {
- commonItems[index++] = item;
- }
- if (index == max) {
- break;
- }
- }
- return commonItems;
- }
-
- private static long setBits(FastIDSet modelSet, List items, int max) {
- long maxItem = -1;
- for (int i = 0; i < items.size() && i < max; i++) {
- long itemID = items.get(i).getItemID();
- modelSet.add(itemID);
- if (itemID > maxItem) {
- maxItem = itemID;
- }
- }
- return maxItem;
- }
-
- private static long setBits(FastIDSet modelSet, PreferenceArray prefs, int max) {
- long maxItem = -1;
- for (int i = 0; i < prefs.length() && i < max; i++) {
- long itemID = prefs.getItemID(i);
- modelSet.add(itemID);
- if (itemID > maxItem) {
- maxItem = itemID;
- }
- }
- return maxItem;
- }
-
- private static void printHeader() {
- log.info("tag,user,samples,common,hamming,bubble,rank,normal,score");
- }
-
- /**
- * Common Subset Scoring
- *
- * These measurements are given the set of results that are common to both
- * recommendation lists. They only get ordered lists.
- *
- * These measures all return raw numbers do not correlate among the tests.
- * The numbers are not corrected against the total number of samples or the
- * number of common items.
- * The one contract is that all measures are 0 for an exact match and an
- * increasing positive number as differences increase.
- */
- private static double scoreCommonSubset(String tag,
- long userID,
- int samples,
- int subset,
- Long[] itemsL,
- Long[] itemsR) {
- int[] vectorZ = new int[subset];
- int[] vectorZabs = new int[subset];
-
- long bubble = sort(itemsL, itemsR);
- int hamming = slidingWindowHamming(itemsR, itemsL);
- if (hamming > samples) {
- throw new IllegalStateException();
- }
- getVectorZ(itemsR, itemsL, vectorZ, vectorZabs);
- double normalW = normalWilcoxon(vectorZ, vectorZabs);
- double meanRank = getMeanRank(vectorZabs);
- // case statement for requested value
- double variance = Math.sqrt(meanRank);
- log.info("{},{},{},{},{},{},{},{},{}",
- new Object[] {tag, userID, samples, subset, hamming, bubble, meanRank, normalW, variance});
- return variance;
- }
-
- // simple sliding-window hamming distance: a[i or plus/minus 1] == b[i]
- private static int slidingWindowHamming(Long[] itemsR, Long[] itemsL) {
- int count = 0;
- int samples = itemsR.length;
-
- if (itemsR[0].equals(itemsL[0]) || itemsR[0].equals(itemsL[1])) {
- count++;
- }
- for (int i = 1; i < samples - 1; i++) {
- long itemID = itemsL[i];
- if (itemsR[i] == itemID || itemsR[i - 1] == itemID || itemsR[i + 1] == itemID) {
- count++;
- }
- }
- if (itemsR[samples - 1].equals(itemsL[samples - 1]) || itemsR[samples - 1].equals(itemsL[samples - 2])) {
- count++;
- }
- return count;
- }
-
- /**
- * Normal-distribution probability value for matched sets of values.
- * Based upon:
- * http://comp9.psych.cornell.edu/Darlington/normscor.htm
- *
- * The Standard Wilcoxon is not used because it requires a lookup table.
- */
- static double normalWilcoxon(int[] vectorZ, int[] vectorZabs) {
- int nitems = vectorZ.length;
-
- double[] ranks = new double[nitems];
- double[] ranksAbs = new double[nitems];
- wilcoxonRanks(vectorZ, vectorZabs, ranks, ranksAbs);
- return Math.min(getMeanWplus(ranks), getMeanWminus(ranks));
- }
-
- /**
- * vector Z is a list of distances between the correct value and the recommended value
- * Z[i] = position i of correct itemID - position of correct itemID in recommendation list
- * can be positive or negative
- * the smaller the better - means recommendations are closer
- * both are the same length, and both sample from the same set
- *
- * destructive to items arrays - allows N log N instead of N^2 order
- */
- private static void getVectorZ(Long[] itemsR, Long[] itemsL, int[] vectorZ, int[] vectorZabs) {
- int nitems = itemsR.length;
- int bottom = 0;
- int top = nitems - 1;
- for (int i = 0; i < nitems; i++) {
- long itemID = itemsR[i];
- for (int j = bottom; j <= top; j++) {
- if (itemsL[j] == null) {
- continue;
- }
- long test = itemsL[j];
- if (itemID == test) {
- vectorZ[i] = i - j;
- vectorZabs[i] = Math.abs(i - j);
- if (j == bottom) {
- bottom++;
- } else if (j == top) {
- top--;
- } else {
- itemsL[j] = null;
- }
- break;
- }
- }
- }
- }
-
- /**
- * Ranks are the position of the value from low to high, divided by the # of values.
- * I had to walk through it a few times.
- */
- private static void wilcoxonRanks(int[] vectorZ, int[] vectorZabs, double[] ranks, double[] ranksAbs) {
- int nitems = vectorZ.length;
- int[] sorted = vectorZabs.clone();
- Arrays.sort(sorted);
- int zeros = 0;
- for (; zeros < nitems; zeros++) {
- if (sorted[zeros] > 0) {
- break;
- }
- }
- for (int i = 0; i < nitems; i++) {
- double rank = 0.0;
- int count = 0;
- int score = vectorZabs[i];
- for (int j = 0; j < nitems; j++) {
- if (score == sorted[j]) {
- rank += j + 1 - zeros;
- count++;
- } else if (score < sorted[j]) {
- break;
- }
- }
- if (vectorZ[i] != 0) {
- ranks[i] = (rank / count) * (vectorZ[i] < 0 ? -1 : 1); // better be at least 1
- ranksAbs[i] = Math.abs(ranks[i]);
- }
- }
- }
-
- private static double getMeanRank(int[] ranks) {
- int nitems = ranks.length;
- double sum = 0.0;
- for (int i = 0; i < nitems; i++) {
- sum += ranks[i];
- }
- return sum / nitems;
- }
-
- private static double getMeanWplus(double[] ranks) {
- int nitems = ranks.length;
- double sum = 0.0;
- for (int i = 0; i < nitems; i++) {
- if (ranks[i] > 0) {
- sum += ranks[i];
- }
- }
- return sum / nitems;
- }
-
- private static double getMeanWminus(double[] ranks) {
- int nitems = ranks.length;
- double sum = 0.0;
- for (int i = 0; i < nitems; i++) {
- if (ranks[i] < 0) {
- sum -= ranks[i];
- }
- }
- return sum / nitems;
- }
-
- /**
- * Do bubble sort and return number of swaps needed to match preference lists.
- * Sort itemsR using itemsL as the reference order.
- */
- static long sort(Long[] itemsL, Long[] itemsR) {
- int length = itemsL.length;
- if (length < 2) {
- return 0;
- }
- if (length == 2) {
- return itemsL[0].longValue() == itemsR[0].longValue() ? 0 : 1;
- }
- // 1) avoid changing originals; 2) primitive type is more efficient
- long[] reference = new long[length];
- long[] sortable = new long[length];
- for (int i = 0; i < length; i++) {
- reference[i] = itemsL[i];
- sortable[i] = itemsR[i];
- }
- int sorted = 0;
- long swaps = 0;
- while (sorted < length - 1) {
- // opportunistically trim back the top
- while (length > 0 && reference[length - 1] == sortable[length - 1]) {
- length--;
- }
- if (length == 0) {
- break;
- }
- if (reference[sorted] == sortable[sorted]) {
- sorted++;
- } else {
- for (int j = sorted; j < length - 1; j++) {
- // do not swap anything already in place
- int jump = 1;
- if (reference[j] == sortable[j]) {
- while (j + jump < length && reference[j + jump] == sortable[j + jump]) {
- jump++;
- }
- }
- if (j + jump < length && !(reference[j] == sortable[j] && reference[j + jump] == sortable[j + jump])) {
- long tmp = sortable[j];
- sortable[j] = sortable[j + 1];
- sortable[j + 1] = tmp;
- swaps++;
- }
- }
- }
- }
- return swaps;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java
deleted file mode 100644
index 97eda1018..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.model.Preference;
-
-/**
- *
- * A {@link org.apache.mahout.cf.taste.eval.RecommenderEvaluator} which computes the "root mean squared"
- * difference between predicted and actual ratings for users. This is the square root of the average of this
- * difference, squared.
- *
- */
-public final class RMSRecommenderEvaluator extends AbstractDifferenceRecommenderEvaluator {
-
- private RunningAverage average;
-
- @Override
- protected void reset() {
- average = new FullRunningAverage();
- }
-
- @Override
- protected void processOneEstimate(float estimatedPreference, Preference realPref) {
- double diff = realPref.getValue() - estimatedPreference;
- average.addDatum(diff * diff);
- }
-
- @Override
- protected double computeFinalEvaluation() {
- return Math.sqrt(average.getAverage());
- }
-
- @Override
- public String toString() {
- return "RMSRecommenderEvaluator";
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java
deleted file mode 100644
index 036d0b428..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.eval;
-
-import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.concurrent.Callable;
-import java.util.concurrent.atomic.AtomicInteger;
-
-final class StatsCallable implements Callable {
-
- private static final Logger log = LoggerFactory.getLogger(StatsCallable.class);
-
- private final Callable delegate;
- private final boolean logStats;
- private final RunningAverageAndStdDev timing;
- private final AtomicInteger noEstimateCounter;
-
- StatsCallable(Callable delegate,
- boolean logStats,
- RunningAverageAndStdDev timing,
- AtomicInteger noEstimateCounter) {
- this.delegate = delegate;
- this.logStats = logStats;
- this.timing = timing;
- this.noEstimateCounter = noEstimateCounter;
- }
-
- @Override
- public Void call() throws Exception {
- long start = System.currentTimeMillis();
- delegate.call();
- long end = System.currentTimeMillis();
- timing.addDatum(end - start);
- if (logStats) {
- Runtime runtime = Runtime.getRuntime();
- int average = (int) timing.getAverage();
- log.info("Average time per recommendation: {}ms", average);
- long totalMemory = runtime.totalMemory();
- long memory = totalMemory - runtime.freeMemory();
- log.info("Approximate memory used: {}MB / {}MB", memory / 1000000L, totalMemory / 1000000L);
- log.info("Unable to recommend in {} cases", noEstimateCounter.get());
- }
- return null;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java
deleted file mode 100644
index a1a2a1f78..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import org.apache.mahout.cf.taste.model.DataModel;
-
-/**
- * Contains some features common to all implementations.
- */
-public abstract class AbstractDataModel implements DataModel {
-
- private float maxPreference;
- private float minPreference;
-
- protected AbstractDataModel() {
- maxPreference = Float.NaN;
- minPreference = Float.NaN;
- }
-
- @Override
- public float getMaxPreference() {
- return maxPreference;
- }
-
- protected void setMaxPreference(float maxPreference) {
- this.maxPreference = maxPreference;
- }
-
- @Override
- public float getMinPreference() {
- return minPreference;
- }
-
- protected void setMinPreference(float minPreference) {
- this.minPreference = minPreference;
- }
-
-}
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
deleted file mode 100644
index a48bc90eb..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-
-import java.util.Collection;
-
-import com.google.common.base.Charsets;
-import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.model.IDMigrator;
-
-public abstract class AbstractIDMigrator implements IDMigrator {
-
- private final MessageDigest md5Digest;
-
- protected AbstractIDMigrator() {
- try {
- md5Digest = MessageDigest.getInstance("MD5");
- } catch (NoSuchAlgorithmException nsae) {
- // Can't happen
- throw new IllegalStateException(nsae);
- }
- }
-
- /**
- * @return most significant 8 bytes of the MD5 hash of the string, as a long
- */
- protected final long hash(String value) {
- byte[] md5hash;
- synchronized (md5Digest) {
- md5hash = md5Digest.digest(value.getBytes(Charsets.UTF_8));
- md5Digest.reset();
- }
- long hash = 0L;
- for (int i = 0; i < 8; i++) {
- hash = hash << 8 | md5hash[i] & 0x00000000000000FFL;
- }
- return hash;
- }
-
- @Override
- public long toLongID(String stringID) {
- return hash(stringID);
- }
-
- @Override
- public void refresh(Collection alreadyRefreshed) {
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
deleted file mode 100644
index e28dd202a..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-
-import javax.sql.DataSource;
-
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.model.UpdatableIDMigrator;
-import org.apache.mahout.common.IOUtils;
-
-/**
- * Implementation which stores the reverse long-to-String mapping in a database. Subclasses can override and
- * configure the class to operate with particular databases by supplying appropriate SQL statements to the
- * constructor.
- */
-public abstract class AbstractJDBCIDMigrator extends AbstractIDMigrator implements UpdatableIDMigrator {
-
- public static final String DEFAULT_MAPPING_TABLE = "taste_id_mapping";
- public static final String DEFAULT_LONG_ID_COLUMN = "long_id";
- public static final String DEFAULT_STRING_ID_COLUMN = "string_id";
-
- private final DataSource dataSource;
- private final String getStringIDSQL;
- private final String storeMappingSQL;
-
- /**
- * @param getStringIDSQL
- * SQL statement which selects one column, the String ID, from a mapping table. The statement
- * should take one long parameter.
- * @param storeMappingSQL
- * SQL statement which saves a mapping from long to String. It should take two parameters, a long
- * and a String.
- */
- protected AbstractJDBCIDMigrator(DataSource dataSource, String getStringIDSQL, String storeMappingSQL) {
- this.dataSource = dataSource;
- this.getStringIDSQL = getStringIDSQL;
- this.storeMappingSQL = storeMappingSQL;
- }
-
- @Override
- public final void storeMapping(long longID, String stringID) throws TasteException {
- Connection conn = null;
- PreparedStatement stmt = null;
- try {
- conn = dataSource.getConnection();
- stmt = conn.prepareStatement(storeMappingSQL);
- stmt.setLong(1, longID);
- stmt.setString(2, stringID);
- stmt.executeUpdate();
- } catch (SQLException sqle) {
- throw new TasteException(sqle);
- } finally {
- IOUtils.quietClose(null, stmt, conn);
- }
- }
-
- @Override
- public final String toStringID(long longID) throws TasteException {
- Connection conn = null;
- PreparedStatement stmt = null;
- ResultSet rs = null;
- try {
- conn = dataSource.getConnection();
- stmt = conn.prepareStatement(getStringIDSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
- stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
- stmt.setFetchSize(1);
- stmt.setLong(1, longID);
- rs = stmt.executeQuery();
- if (rs.next()) {
- return rs.getString(1);
- } else {
- return null;
- }
- } catch (SQLException sqle) {
- throw new TasteException(sqle);
- } finally {
- IOUtils.quietClose(rs, stmt, conn);
- }
- }
-
- @Override
- public void initialize(Iterable stringIDs) throws TasteException {
- for (String stringID : stringIDs) {
- storeMapping(toLongID(stringID), stringID);
- }
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java
deleted file mode 100644
index 1fa639dae..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-import com.google.common.base.Function;
-import com.google.common.collect.Iterators;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.common.iterator.CountingIterator;
-
-/**
- *
- * Like {@link BooleanUserPreferenceArray} but stores preferences for one item (all item IDs the same) rather
- * than one user.
- *
- *
- * @see BooleanPreference
- * @see BooleanUserPreferenceArray
- * @see GenericItemPreferenceArray
- */
-public final class BooleanItemPreferenceArray implements PreferenceArray {
-
- private final long[] ids;
- private long id;
-
- public BooleanItemPreferenceArray(int size) {
- this.ids = new long[size];
- this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value
- }
-
- public BooleanItemPreferenceArray(List extends Preference> prefs, boolean forOneUser) {
- this(prefs.size());
- int size = prefs.size();
- for (int i = 0; i < size; i++) {
- Preference pref = prefs.get(i);
- ids[i] = forOneUser ? pref.getItemID() : pref.getUserID();
- }
- if (size > 0) {
- id = forOneUser ? prefs.get(0).getUserID() : prefs.get(0).getItemID();
- }
- }
-
- /**
- * This is a private copy constructor for clone().
- */
- private BooleanItemPreferenceArray(long[] ids, long id) {
- this.ids = ids;
- this.id = id;
- }
-
- @Override
- public int length() {
- return ids.length;
- }
-
- @Override
- public Preference get(int i) {
- return new PreferenceView(i);
- }
-
- @Override
- public void set(int i, Preference pref) {
- id = pref.getItemID();
- ids[i] = pref.getUserID();
- }
-
- @Override
- public long getUserID(int i) {
- return ids[i];
- }
-
- @Override
- public void setUserID(int i, long userID) {
- ids[i] = userID;
- }
-
- @Override
- public long getItemID(int i) {
- return id;
- }
-
- /**
- * {@inheritDoc}
- *
- * Note that this method will actually set the item ID for all preferences.
- */
- @Override
- public void setItemID(int i, long itemID) {
- id = itemID;
- }
-
- /**
- * @return all user IDs
- */
- @Override
- public long[] getIDs() {
- return ids;
- }
-
- @Override
- public float getValue(int i) {
- return 1.0f;
- }
-
- @Override
- public void setValue(int i, float value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void sortByUser() {
- Arrays.sort(ids);
- }
-
- @Override
- public void sortByItem() { }
-
- @Override
- public void sortByValue() { }
-
- @Override
- public void sortByValueReversed() { }
-
- @Override
- public boolean hasPrefWithUserID(long userID) {
- for (long id : ids) {
- if (userID == id) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- public boolean hasPrefWithItemID(long itemID) {
- return id == itemID;
- }
-
- @Override
- public BooleanItemPreferenceArray clone() {
- return new BooleanItemPreferenceArray(ids.clone(), id);
- }
-
- @Override
- public int hashCode() {
- return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids);
- }
-
- @Override
- public boolean equals(Object other) {
- if (!(other instanceof BooleanItemPreferenceArray)) {
- return false;
- }
- BooleanItemPreferenceArray otherArray = (BooleanItemPreferenceArray) other;
- return id == otherArray.id && Arrays.equals(ids, otherArray.ids);
- }
-
- @Override
- public Iterator iterator() {
- return Iterators.transform(new CountingIterator(length()),
- new Function() {
- @Override
- public Preference apply(Integer from) {
- return new PreferenceView(from);
- }
- });
- }
-
- @Override
- public String toString() {
- StringBuilder result = new StringBuilder(10 * ids.length);
- result.append("BooleanItemPreferenceArray[itemID:");
- result.append(id);
- result.append(",{");
- for (int i = 0; i < ids.length; i++) {
- if (i > 0) {
- result.append(',');
- }
- result.append(ids[i]);
- }
- result.append("}]");
- return result.toString();
- }
-
- private final class PreferenceView implements Preference {
-
- private final int i;
-
- private PreferenceView(int i) {
- this.i = i;
- }
-
- @Override
- public long getUserID() {
- return BooleanItemPreferenceArray.this.getUserID(i);
- }
-
- @Override
- public long getItemID() {
- return BooleanItemPreferenceArray.this.getItemID(i);
- }
-
- @Override
- public float getValue() {
- return 1.0f;
- }
-
- @Override
- public void setValue(float value) {
- throw new UnsupportedOperationException();
- }
-
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java
deleted file mode 100644
index 3c05cafe1..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import java.io.Serializable;
-
-import org.apache.mahout.cf.taste.model.Preference;
-
-/**
- * Encapsulates a simple boolean "preference" for an item whose value does not matter (is fixed at 1.0). This
- * is appropriate in situations where users conceptually have only a general "yes" preference for items,
- * rather than a spectrum of preference values.
- */
-public final class BooleanPreference implements Preference, Serializable {
-
- private final long userID;
- private final long itemID;
-
- public BooleanPreference(long userID, long itemID) {
- this.userID = userID;
- this.itemID = itemID;
- }
-
- @Override
- public long getUserID() {
- return userID;
- }
-
- @Override
- public long getItemID() {
- return itemID;
- }
-
- @Override
- public float getValue() {
- return 1.0f;
- }
-
- @Override
- public void setValue(float value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public String toString() {
- return "BooleanPreference[userID: " + userID + ", itemID:" + itemID + ']';
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java
deleted file mode 100644
index 931f60b4b..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-import com.google.common.base.Function;
-import com.google.common.collect.Iterators;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.common.iterator.CountingIterator;
-
-/**
- *
- * Like {@link GenericUserPreferenceArray} but stores, conceptually, {@link BooleanPreference} objects which
- * have no associated preference value.
- *
- *
- * @see BooleanPreference
- * @see BooleanItemPreferenceArray
- * @see GenericUserPreferenceArray
- */
-public final class BooleanUserPreferenceArray implements PreferenceArray {
-
- private final long[] ids;
- private long id;
-
- public BooleanUserPreferenceArray(int size) {
- this.ids = new long[size];
- this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value
- }
-
- public BooleanUserPreferenceArray(List extends Preference> prefs) {
- this(prefs.size());
- int size = prefs.size();
- for (int i = 0; i < size; i++) {
- Preference pref = prefs.get(i);
- ids[i] = pref.getItemID();
- }
- if (size > 0) {
- id = prefs.get(0).getUserID();
- }
- }
-
- /**
- * This is a private copy constructor for clone().
- */
- private BooleanUserPreferenceArray(long[] ids, long id) {
- this.ids = ids;
- this.id = id;
- }
-
- @Override
- public int length() {
- return ids.length;
- }
-
- @Override
- public Preference get(int i) {
- return new PreferenceView(i);
- }
-
- @Override
- public void set(int i, Preference pref) {
- id = pref.getUserID();
- ids[i] = pref.getItemID();
- }
-
- @Override
- public long getUserID(int i) {
- return id;
- }
-
- /**
- * {@inheritDoc}
- *
- * Note that this method will actually set the user ID for all preferences.
- */
- @Override
- public void setUserID(int i, long userID) {
- id = userID;
- }
-
- @Override
- public long getItemID(int i) {
- return ids[i];
- }
-
- @Override
- public void setItemID(int i, long itemID) {
- ids[i] = itemID;
- }
-
- /**
- * @return all item IDs
- */
- @Override
- public long[] getIDs() {
- return ids;
- }
-
- @Override
- public float getValue(int i) {
- return 1.0f;
- }
-
- @Override
- public void setValue(int i, float value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void sortByUser() { }
-
- @Override
- public void sortByItem() {
- Arrays.sort(ids);
- }
-
- @Override
- public void sortByValue() { }
-
- @Override
- public void sortByValueReversed() { }
-
- @Override
- public boolean hasPrefWithUserID(long userID) {
- return id == userID;
- }
-
- @Override
- public boolean hasPrefWithItemID(long itemID) {
- for (long id : ids) {
- if (itemID == id) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- public BooleanUserPreferenceArray clone() {
- return new BooleanUserPreferenceArray(ids.clone(), id);
- }
-
- @Override
- public int hashCode() {
- return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids);
- }
-
- @Override
- public boolean equals(Object other) {
- if (!(other instanceof BooleanUserPreferenceArray)) {
- return false;
- }
- BooleanUserPreferenceArray otherArray = (BooleanUserPreferenceArray) other;
- return id == otherArray.id && Arrays.equals(ids, otherArray.ids);
- }
-
- @Override
- public Iterator iterator() {
- return Iterators.transform(new CountingIterator(length()),
- new Function() {
- @Override
- public Preference apply(Integer from) {
- return new PreferenceView(from);
- }
- });
- }
-
- @Override
- public String toString() {
- StringBuilder result = new StringBuilder(10 * ids.length);
- result.append("BooleanUserPreferenceArray[userID:");
- result.append(id);
- result.append(",{");
- for (int i = 0; i < ids.length; i++) {
- if (i > 0) {
- result.append(',');
- }
- result.append(ids[i]);
- }
- result.append("}]");
- return result.toString();
- }
-
- private final class PreferenceView implements Preference {
-
- private final int i;
-
- private PreferenceView(int i) {
- this.i = i;
- }
-
- @Override
- public long getUserID() {
- return BooleanUserPreferenceArray.this.getUserID(i);
- }
-
- @Override
- public long getItemID() {
- return BooleanUserPreferenceArray.this.getItemID(i);
- }
-
- @Override
- public float getValue() {
- return 1.0f;
- }
-
- @Override
- public void setValue(float value) {
- throw new UnsupportedOperationException();
- }
-
- }
-
-}
\ No newline at end of file
diff --git a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java b/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
deleted file mode 100644
index 56e7028dc..000000000
--- a/common/mahout-distribution-0.7-hadoop1/distribution/target/mahout-distribution-0.7-src/mahout-distribution-0.7/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
+++ /dev/null
@@ -1,320 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.impl.model;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
-import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
-import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator;
-import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.PreferenceArray;
-
-import com.google.common.base.Preconditions;
-
-/**
- *
- * A simple {@link DataModel} which uses given user data as its data source. This implementation
- * is mostly useful for small experiments and is not recommended for contexts where performance is important.
- *
- */
-public final class GenericBooleanPrefDataModel extends AbstractDataModel {
-
- private final long[] userIDs;
- private final FastByIDMap preferenceFromUsers;
- private final long[] itemIDs;
- private final FastByIDMap preferenceForItems;
- private final FastByIDMap> timestamps;
-
- /**
- *
- * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
- * {@link DataModel} retains all this information in memory and is effectively immutable.
- *
- *
- * @param userData users to include
- */
- public GenericBooleanPrefDataModel(FastByIDMap userData) {
- this(userData, null);
- }
-
- /**
- *
- * Creates a new {@link GenericDataModel} from the given users (and their preferences). This
- * {@link DataModel} retains all this information in memory and is effectively immutable.
- *
- *
- * @param userData users to include
- * @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch.
- * User IDs are mapped to maps of item IDs to Long timestamps.
- */
- public GenericBooleanPrefDataModel(FastByIDMap userData, FastByIDMap> timestamps) {
- Preconditions.checkArgument(userData != null, "userData is null");
-
- this.preferenceFromUsers = userData;
- this.preferenceForItems = new FastByIDMap();
- FastIDSet itemIDSet = new FastIDSet();
- for (Map.Entry entry : preferenceFromUsers.entrySet()) {
- long userID = entry.getKey();
- FastIDSet itemIDs = entry.getValue();
- itemIDSet.addAll(itemIDs);
- LongPrimitiveIterator it = itemIDs.iterator();
- while (it.hasNext()) {
- long itemID = it.nextLong();
- FastIDSet userIDs = preferenceForItems.get(itemID);
- if (userIDs == null) {
- userIDs = new FastIDSet(2);
- preferenceForItems.put(itemID, userIDs);
- }
- userIDs.add(userID);
- }
- }
-
- this.itemIDs = itemIDSet.toArray();
- itemIDSet = null; // Might help GC -- this is big
- Arrays.sort(itemIDs);
-
- this.userIDs = new long[userData.size()];
- int i = 0;
- LongPrimitiveIterator it = userData.keySetIterator();
- while (it.hasNext()) {
- userIDs[i++] = it.next();
- }
- Arrays.sort(userIDs);
-
- this.timestamps = timestamps;
- }
-
- /**
- *
- * Creates a new {@link GenericDataModel} containing an immutable copy of the data from another given
- * {@link DataModel}.
- *
- *
- * @param dataModel
- * {@link DataModel} to copy
- * @throws TasteException
- * if an error occurs while retrieving the other {@link DataModel}'s users
- * @deprecated without direct replacement.
- * Consider {@link #toDataMap(DataModel)} with {@link #GenericBooleanPrefDataModel(FastByIDMap)}
- */
- @Deprecated
- public GenericBooleanPrefDataModel(DataModel dataModel) throws TasteException {
- this(toDataMap(dataModel));
- }
-
- /**
- * Exports the simple user IDs and associated item IDs in the data model.
- *
- * @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing
- * that user's associated items
- */
- public static FastByIDMap toDataMap(DataModel dataModel) throws TasteException {
- FastByIDMap data = new FastByIDMap(dataModel.getNumUsers());
- LongPrimitiveIterator it = dataModel.getUserIDs();
- while (it.hasNext()) {
- long userID = it.nextLong();
- data.put(userID, dataModel.getItemIDsFromUser(userID));
- }
- return data;
- }
-
- public static FastByIDMap toDataMap(FastByIDMap data) {
- for (Map.Entry entry : ((FastByIDMap