diff --git a/unittest/Makefile.am b/unittest/Makefile.am
index 681b9512aa..c24ded9a6c 100644
--- a/unittest/Makefile.am
+++ b/unittest/Makefile.am
@@ -1,3 +1,6 @@
+# Absolute path of directory 'langdata'.
+LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata
+
 # Absolute path of directory 'tessdata' with traineddata files
 # (must be on same level as top source directory).
 TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
@@ -6,6 +9,7 @@ TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
 # (using submodule test).
 TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
 
+AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
 AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
 AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
 AM_CPPFLAGS += -DPANGO_ENABLE_ENGINE
@@ -83,7 +87,6 @@ GMOCK_LIBS =  libgmock.la libgmock_main.la
 TESS_LIBS = $(top_builddir)/src/api/libtesseract.la
 TRAINING_LIBS = $(top_builddir)/src/training/libtesseract_training.la
 TRAINING_LIBS += $(top_builddir)/src/training/libtesseract_tessopt.la
-TRAINING_LIBS += $(ICU_UC_LIBS)
 AM_CPPFLAGS +=   -isystem $(top_srcdir)/googletest/googletest/include \
                  -isystem $(top_srcdir)/googletest/googlemock/include
 
@@ -101,6 +104,7 @@ check_PROGRAMS = \
   indexmapbidi_test \
   intfeaturemap_test \
   intsimdmatrix_test \
+  lang_model_test \
   linlsq_test \
   loadlang_test \
   matrix_test \
@@ -149,7 +153,7 @@ colpartition_test_SOURCES = colpartition_test.cc
 colpartition_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 
 commandlineflags_test_SOURCES = commandlineflags_test.cc
-commandlineflags_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS)
+commandlineflags_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
 
 denorm_test_SOURCES = denorm_test.cc
 denorm_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
@@ -172,6 +176,9 @@ intfeaturemap_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 intsimdmatrix_test_SOURCES = intsimdmatrix_test.cc
 intsimdmatrix_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 
+lang_model_test_SOURCES = lang_model_test.cc
+lang_model_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
+
 linlsq_test_SOURCES = linlsq_test.cc
 linlsq_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 
@@ -222,7 +229,7 @@ tfile_test_SOURCES = tfile_test.cc
 tfile_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 
 validator_test_SOURCES = validator_test.cc
-validator_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS)
+validator_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
 
 # for windows
 if T_WIN
diff --git a/unittest/include_gunit.h b/unittest/include_gunit.h
index 80c7b9cb1a..9f93271b3b 100644
--- a/unittest/include_gunit.h
+++ b/unittest/include_gunit.h
@@ -28,6 +28,15 @@ class file : public tesseract::File {
   static int Defaults() {
     return 0;
   }
+
+  static std::string JoinPath(const std::string& s1, const std::string& s2) {
+    return tesseract::File::JoinPath(s1, s2);
+  }
+
+  static std::string JoinPath(const std::string& s1, const std::string& s2,
+                              const std::string& s3) {
+    return JoinPath(JoinPath(s1, s2), s3);
+  }
 };
 
 #if !defined(ABSL_ARRAYSIZE)
diff --git a/unittest/lang_model_test.cc b/unittest/lang_model_test.cc
index 7a577c3aaa..77b4b85a11 100644
--- a/unittest/lang_model_test.cc
+++ b/unittest/lang_model_test.cc
@@ -1,13 +1,31 @@
-#include "tesseract/training/lang_model_helpers.h"
+// (C) Copyright 2017, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
-#include "tesseract/lstm/lstmtrainer.h"
-#include "tesseract/training/unicharset_training_utils.h"
+#include <string>                       // for std::string
+
+#include "absl/strings/str_cat.h"
+
+#include "gmock/gmock.h"                // for testing::ElementsAreArray
+
+#include "include_gunit.h"
+#include "lang_model_helpers.h"
+#include "log.h"                        // for LOG
+#include "lstmtrainer.h"
+#include "unicharset_training_utils.h"
 
 namespace tesseract {
 namespace {
 
-string TestDataNameToPath(const string& name) {
-  return file::JoinPath(FLAGS_test_srcdir, "testdata", name);
+std::string TestDataNameToPath(const std::string& name) {
+  return file::JoinPath(TESTING_DIR, name);
 }
 
 // This is an integration test that verifies that CombineLangModel works to
@@ -18,15 +36,15 @@ TEST(LangModelTest, AddACharacter) {
   constexpr char kTestString[] = "Simple ASCII string to encode !@#$%&";
   constexpr char kTestStringRupees[] = "ASCII string with Rupee symbol ₹";
   // Setup the arguments.
-  string script_dir = file::JoinPath(FLAGS_test_srcdir, "langdata");
-  string eng_dir = file::JoinPath(script_dir, "eng");
-  string unicharset_path = TestDataNameToPath("eng_beam.unicharset");
+  std::string script_dir = LANGDATA_DIR;
+  std::string eng_dir = file::JoinPath(script_dir, "eng");
+  std::string unicharset_path = TestDataNameToPath("eng_beam.unicharset");
   UNICHARSET unicharset;
   EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
-  string version_str = "TestVersion";
-  string output_dir = FLAGS_test_tmpdir;
+  std::string version_str = "TestVersion";
+  std::string output_dir = FLAGS_test_tmpdir;
   LOG(INFO) << "Output dir=" << output_dir;
-  string lang1 = "eng";
+  std::string lang1 = "eng";
   bool pass_through_recoder = false;
   GenericVector<STRING> words, puncs, numbers;
   // If these reads fail, we get a warning message and an empty list of words.
@@ -44,7 +62,7 @@ TEST(LangModelTest, AddACharacter) {
                                 lang1, pass_through_recoder, words, puncs,
                                 numbers, lang_is_rtl, nullptr, nullptr));
   // Init a trainer with it, and encode a string.
-  string traineddata1 =
+  std::string traineddata1 =
       file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
   LSTMTrainer trainer1;
   trainer1.InitCharSet(traineddata1);
@@ -58,13 +76,13 @@ TEST(LangModelTest, AddACharacter) {
                        &unicharset);
   EXPECT_EQ(size_before + 1, unicharset.size());
   // Generate the traineddata file.
-  string lang2 = "extended";
+  std::string lang2 = "extended";
   EXPECT_EQ(EXIT_SUCCESS,
             CombineLangModel(unicharset, script_dir, version_str, output_dir,
                              lang2, pass_through_recoder, words, puncs, numbers,
                              lang_is_rtl, nullptr, nullptr));
   // Init a trainer with it, and encode a string.
-  string traineddata2 =
+  std::string traineddata2 =
       file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
   LSTMTrainer trainer2;
   trainer2.InitCharSet(traineddata2);
@@ -86,7 +104,7 @@ TEST(LangModelTest, AddACharacter) {
   }
   EXPECT_THAT(labels1_v,
               testing::ElementsAreArray(&labels2[0], labels2.size()));
-  // To make sure we weren't cheating somehow, we can now encode the Rupee
+  // To make sure we we are not cheating somehow, we can now encode the Rupee
   // symbol, which we could not do before.
   EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
   EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));