Skip to content

Commit

Permalink
Handle dodgy data files with invalid JSON contents and rationalise fi…
Browse files Browse the repository at this point in the history
…le result class
  • Loading branch information
AdaTheDev committed Feb 11, 2013
1 parent 9007a75 commit ba32148
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 75 deletions.
Expand Up @@ -58,7 +58,7 @@
<ItemGroup>
<Compile Include="Common\TestCases.cs" />
<Compile Include="ElasticConnectionSettingsFixture.cs" />
<Compile Include="ImportedFileFixture.cs" />
<Compile Include="ImportFileResultFixture.cs" />
<Compile Include="ImporterFixture.cs" />
<Compile Include="ImportResultFixture.cs" />
<Compile Include="IO\TweetFileParserFixture.cs" />
Expand Down
70 changes: 70 additions & 0 deletions ElasticTweets.Library.UnitTests/ImportFileResultFixture.cs
@@ -0,0 +1,70 @@
using System;
using NUnit.Framework;

// ReSharper disable InconsistentNaming
namespace ElasticTweets.Library.UnitTests
{
[TestFixture]
public class ImportFileResultFixture
{
private const string TestFileName = "File1.js";
private const int TestNumberOfTweets = 123;
private const string TestErrorMessage = "Went pop";

[Test]
public void Constructor_Success_SetsFileName()
{
var file = new ImportFileResult(TestFileName, TestNumberOfTweets);

Assert.AreEqual(TestFileName, file.FileName);
}

[Test]
public void Constructor_Success_SetsNumberOfTweets()
{
var file = new ImportFileResult(TestFileName, TestNumberOfTweets);

Assert.AreEqual(TestNumberOfTweets, file.NumberOfTweets);
}

[Test]
public void Constructor_Success_DefaultsErrorMessageToEmpty()
{
var file = new ImportFileResult(TestFileName, TestNumberOfTweets);

Assert.AreEqual(String.Empty, file.ErrorMessage);
}

[Test]
public void Constructor_Success_SetsSuccessFlagToTrue()
{
var file = new ImportFileResult(TestFileName, TestNumberOfTweets);

Assert.IsTrue(file.Success);
}
[Test]
public void Constructor_Failure_SetsFileName()
{
var file = new ImportFileResult(TestFileName, TestErrorMessage);

Assert.AreEqual(TestFileName, file.FileName);
}

[Test]
public void Constructor_Failure_SetsErrorMessage()
{
var file = new ImportFileResult(TestFileName, TestErrorMessage);

Assert.AreEqual(TestErrorMessage, file.ErrorMessage);
}

[Test]
public void Constructor_Failure_SetsSuccessFlagToFalse()
{
var file = new ImportFileResult(TestFileName, TestErrorMessage);

Assert.IsFalse(file.Success);
}
}
}
// ReSharper restore InconsistentNaming
8 changes: 4 additions & 4 deletions ElasticTweets.Library.UnitTests/ImportResultFixture.cs
Expand Up @@ -13,19 +13,19 @@ public void Constructor_DefaultsImportedFilesToEmptyArray()
{
var result = new ImportResult();

Assert.IsEmpty(result.ImportedFiles);
Assert.IsEmpty(result.Files);
}

[Test]
public void AddImportedFile_AddsFileResult()
{
var result = new ImportResult();
var file = new ImportedFile("1.txt", 123);
var file = new ImportFileResult("1.txt", 123);

result.AddImportedFile(file);

Assert.AreEqual(1, result.ImportedFiles.Count(), "Expected 1 file in ImportedFiles" );
Assert.That(result.ImportedFiles.Any(f => f == file), "Expected file not found");
Assert.AreEqual(1, result.Files.Count(), "Expected 1 file in Files" );
Assert.That(result.Files.Any(f => f == file), "Expected file not found");
}

[Test]
Expand Down
29 changes: 0 additions & 29 deletions ElasticTweets.Library.UnitTests/ImportedFileFixture.cs

This file was deleted.

33 changes: 28 additions & 5 deletions ElasticTweets.Library.UnitTests/ImporterFixture.cs
Expand Up @@ -7,6 +7,7 @@
using Moq;
using NUnit.Framework;
using Nest;
using Newtonsoft.Json;

// ReSharper disable InconsistentNaming
namespace ElasticTweets.Library.UnitTests
Expand Down Expand Up @@ -42,7 +43,9 @@ public void InitialiseImporter()
_mockedFileSystem.Setup(fs => fs.DirectoryExists(TestSourceDirectory)).Returns(true);
_mockedClientProvider.Setup(cp => cp.GetClient(_mockedConnectionSettings.Object))
.Returns(_mockedClient.Object);

_mockedFileParser.Setup(fp => fp.GetTweets(It.IsAny<string>())).Returns(_testTweets);

_mockedClient.Setup(c => c.IndexMany(It.IsAny<IEnumerable<dynamic>>())).Returns(_mockedClientResponse.Object);

_importer = new Importer(_mockedFileSystem.Object, _mockedFileParser.Object, _mockedClientProvider.Object, _mockedConnectionSettings.Object, TestSourceDirectory);
Expand Down Expand Up @@ -213,20 +216,40 @@ public void Import_CallsElasticClientToIndexTweets()
}

[Test]
public void Import_ReturnsCorrectImportResult()
public void Import_ReturnsCorrectImportResultForValidFile()
{
InitialiseImporter();
_mockedFileSystem.Setup(fs => fs.GetFiles(TestSourceDirectory, "*.js")).Returns(new[] { "1.js" });
_mockedFileSystem.Setup(fs => fs.ReadAllText(It.IsAny<string>())).Returns("");
SetupFileSystem();
_testTweets.Add(new { id = 1 });

var result = _importer.Import();

Assert.AreEqual(1, result.ImportedFiles.Count(), "1 ImportedFile expected");
var file = result.ImportedFiles.First();
Assert.AreEqual(1, result.Files.Count(), "1 ImportFileResult expected");
var file = result.Files.First();
Assert.AreEqual(1, file.NumberOfTweets, "Incorrect NumberOfTweets");
Assert.AreEqual("1.js", file.FileName, "Incorrect FileName");
Assert.IsTrue(file.Success, "Success should be True");
}

private void SetupFileSystem()
{
_mockedFileSystem.Setup(fs => fs.GetFiles(TestSourceDirectory, "*.js")).Returns(new[] {"1.js"});
_mockedFileSystem.Setup(fs => fs.ReadAllText(It.IsAny<string>())).Returns("");
}

[Test]
public void Import_ReturnsFailureResultForFileWithInvalidJson()
{
InitialiseImporter();
SetupFileSystem();
_mockedFileParser.Setup(p => p.GetTweets(It.IsAny<string>())).Throws(new JsonReaderException("Dummy Error"));

var result = _importer.Import();

Assert.AreEqual(1, result.Files.Count(), "1 ImportFileResult expected");
var file = result.Files.First();
Assert.IsFalse(file.Success, "Success should be false");
Assert.That(file.ErrorMessage.StartsWith("File contains invalid JSON"), "Unexpected error message");
}
#endregion
}
Expand Down
2 changes: 1 addition & 1 deletion ElasticTweets.Library/ElasticTweets.Library.csproj
Expand Up @@ -51,7 +51,7 @@
<ItemGroup>
<Compile Include="ElasticConnectionSettings.cs" />
<Compile Include="IElasticConnectionSettings.cs" />
<Compile Include="ImportedFile.cs" />
<Compile Include="ImportFileResult.cs" />
<Compile Include="Importer.cs" />
<Compile Include="ImportResult.cs" />
<Compile Include="IO\FileSystem.cs" />
Expand Down
47 changes: 47 additions & 0 deletions ElasticTweets.Library/ImportFileResult.cs
@@ -0,0 +1,47 @@
using System;

namespace ElasticTweets.Library
{
public sealed class ImportFileResult
{
private readonly string _fileName;
private readonly int _numberOfTweets;
private readonly string _errorMessage;
private readonly bool _success;

public ImportFileResult(string fileName, int numberOfTweets)
{
_fileName = fileName;
_numberOfTweets = numberOfTweets;
_errorMessage = String.Empty;
_success = true;
}

public ImportFileResult(string fileName, string errorMessage)
{
_fileName = fileName;
_errorMessage = errorMessage;
_success = false;
}

public string FileName
{
get { return _fileName; }
}

public int NumberOfTweets
{
get { return _numberOfTweets; }
}

public string ErrorMessage
{
get { return _errorMessage; }
}

public bool Success
{
get { return _success; }
}
}
}
8 changes: 4 additions & 4 deletions ElasticTweets.Library/ImportResult.cs
Expand Up @@ -5,19 +5,19 @@ namespace ElasticTweets.Library
{
public class ImportResult
{
private readonly List<ImportedFile> _importedFiles;
private readonly List<ImportFileResult> _importedFiles;

public ImportResult()
{
_importedFiles = new List<ImportedFile>();
_importedFiles = new List<ImportFileResult>();
}

public IEnumerable<ImportedFile> ImportedFiles
public IEnumerable<ImportFileResult> Files
{
get { return _importedFiles.AsReadOnly();}
}

public void AddImportedFile(ImportedFile file)
public void AddImportedFile(ImportFileResult file)
{
if (file == null) throw new ArgumentNullException("file");
_importedFiles.Add(file);
Expand Down
24 changes: 0 additions & 24 deletions ElasticTweets.Library/ImportedFile.cs

This file was deleted.

28 changes: 21 additions & 7 deletions ElasticTweets.Library/Importer.cs
Expand Up @@ -5,6 +5,7 @@
using ElasticTweets.Library.IO;
using ElasticTweets.Library.Providers;
using Nest;
using Newtonsoft.Json;

namespace ElasticTweets.Library
{
Expand Down Expand Up @@ -54,8 +55,9 @@ public string SourceDirectory

/// <summary>
/// Iterates round each .js file in the source directory,
/// deserializes the tweet data in each one and pushes into
/// ElasticSearch
/// deserializes the tweet data in each one and pushes into ElasticSearch.
/// Each file = 1 month's worth of tweets. Currently, entire file is read in
/// one go and all tweets pushed to ES in a batch.
/// </summary>
/// <returns>ImportResult</returns>
public ImportResult Import()
Expand All @@ -71,16 +73,28 @@ public ImportResult Import()
return results;
}

private ImportedFile ProcessFile(string file, IElasticClient client)
private ImportFileResult ProcessFile(string file, IElasticClient client)
{
IEnumerable<dynamic> tweets = _parser.GetTweets(file).ToArray();
ImportFileResult result;

if (tweets.Any())
try
{
client.IndexMany(tweets);
IEnumerable<dynamic> tweets = _parser.GetTweets(file).ToArray();

if (tweets.Any())
{
client.IndexMany(tweets);
}

result = new ImportFileResult(file, tweets.Count());
}
catch (JsonReaderException jsonReaderException)
{
// Dodgy file/invalid json
result = new ImportFileResult(file, "File contains invalid JSON. Exception: " + jsonReaderException.Message);
}

return new ImportedFile(file, tweets.Count());
return result;
}
}
}

0 comments on commit ba32148

Please sign in to comment.