Multi-GPU Data Parallelism (with Parallel Data Layers) #2903
Merged
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
45d792e
Thread-local Caffe
cypof d94ca3f
Add BlockingQueue for inter-thread communication
cypof 73b3d13
Change the way threads are started and stopped
cypof ddcdc9d
Persistent prefetch thread
cypof bcc8f50
Add DataReader for parallel training with one DB session
cypof d2f0457
Allocate host memory through cudaMallocHost
cypof e5575cf
Multi-GPU
cypof 335bee7
Detect topology corner cases and improve broadcast order
8771d0f
[docs] add multi-gpu usage note to interfaces
shelhamer 0d34d5b
Data Layers Parallel for Multi-GPU
ronghanghu 6b50ed6
Apply mutex only to shared layers and fix NVCC warning
ronghanghu
Jump to file or symbol
Failed to load files and symbols.
| @@ -0,0 +1,82 @@ | ||
| +#ifndef CAFFE_DATA_READER_HPP_ | ||
| +#define CAFFE_DATA_READER_HPP_ | ||
| + | ||
| +#include <map> | ||
| +#include <string> | ||
| +#include <vector> | ||
| + | ||
| +#include "caffe/common.hpp" | ||
| +#include "caffe/internal_thread.hpp" | ||
| +#include "caffe/util/blocking_queue.hpp" | ||
| +#include "caffe/util/db.hpp" | ||
| + | ||
| +namespace caffe { | ||
| + | ||
| +/** | ||
| + * @brief Reads data from a source to queues available to data layers. | ||
| + * A single reading thread is created per source, even if multiple solvers | ||
| + * are running in parallel, e.g. for multi-GPU training. This makes sure | ||
| + * databases are read sequentially, and that each solver accesses a different | ||
| + * subset of the database. Data is distributed to solvers in a round-robin | ||
| + * way to keep parallel training deterministic. | ||
| + */ | ||
| +class DataReader { | ||
| + public: | ||
| + explicit DataReader(const LayerParameter& param); | ||
| + ~DataReader(); | ||
| + | ||
| + inline BlockingQueue<Datum*>& free() const { | ||
| + return queue_pair_->free_; | ||
| + } | ||
| + inline BlockingQueue<Datum*>& full() const { | ||
| + return queue_pair_->full_; | ||
| + } | ||
| + | ||
| + protected: | ||
| + // Queue pairs are shared between a body and its readers | ||
| + class QueuePair { | ||
| + public: | ||
| + explicit QueuePair(int size); | ||
| + ~QueuePair(); | ||
| + | ||
| + BlockingQueue<Datum*> free_; | ||
| + BlockingQueue<Datum*> full_; | ||
| + | ||
| + DISABLE_COPY_AND_ASSIGN(QueuePair); | ||
| + }; | ||
| + | ||
| + // A single body is created per source | ||
| + class Body : public InternalThread { | ||
| + public: | ||
| + explicit Body(const LayerParameter& param); | ||
| + virtual ~Body(); | ||
| + | ||
| + protected: | ||
| + void InternalThreadEntry(); | ||
| + void read_one(db::Cursor* cursor, QueuePair* qp); | ||
| + | ||
| + const LayerParameter param_; | ||
| + BlockingQueue<shared_ptr<QueuePair> > new_queue_pairs_; | ||
| + | ||
| + friend class DataReader; | ||
| + | ||
| + DISABLE_COPY_AND_ASSIGN(Body); | ||
| + }; | ||
| + | ||
| + // A source is uniquely identified by its layer name + path, in case | ||
| + // the same database is read from two different locations in the net. | ||
| + static inline string source_key(const LayerParameter& param) { | ||
| + return param.name() + ":" + param.data_param().source(); | ||
| + } | ||
| + | ||
| + const shared_ptr<QueuePair> queue_pair_; | ||
| + shared_ptr<Body> body_; | ||
| + | ||
| + static map<const string, boost::weak_ptr<DataReader::Body> > bodies_; | ||
| + | ||
| +DISABLE_COPY_AND_ASSIGN(DataReader); | ||
| +}; | ||
| + | ||
| +} // namespace caffe | ||
| + | ||
| +#endif // CAFFE_DATA_READER_HPP_ |
Oops, something went wrong.