Skip to content

Commit

Permalink
preordering in batches implemented, some finetune fixes, last commit …
Browse files Browse the repository at this point in the history
…before complete rebranding as OPUS
  • Loading branch information
TommiNieminen committed Dec 22, 2020
1 parent 473ca3f commit 02540aa
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 39 deletions.
8 changes: 4 additions & 4 deletions FinetuneTestsetExtractor/FinetuneWpfControl.xaml.cs
Expand Up @@ -72,7 +72,7 @@ public FinetuneWpfControl(FinetuneBatchTaskSettings Settings)
this.Settings = Settings;
//Mode defaults, changeable with radio buttons
this.Settings.Finetune = true;
this.Settings.BatchTranslate = true;
this.Settings.PreOrderMtForNewSegments = true;
//Some settings are initially held in a FiskmoOptions object (the shared properties
//with the translation provider settings).
this.Options = new FiskmoOptions();
Expand All @@ -95,16 +95,16 @@ private void ModeButton_Checked(object sender, RoutedEventArgs e)
switch (radioButton.Name)
{
case "FinetuneAndTranslate":
this.Settings.BatchTranslate = true;
this.Settings.PreOrderMtForNewSegments = true;
this.Settings.Finetune = true;
break;
case "FinetuneOnly":
this.Settings.Finetune = true;
this.Settings.BatchTranslate = false;
this.Settings.PreOrderMtForNewSegments = false;
break;
case "TranslateOnly":
this.Settings.Finetune = false;
this.Settings.BatchTranslate = true;
this.Settings.PreOrderMtForNewSegments = true;
break;
}

Expand Down
12 changes: 6 additions & 6 deletions FiskmoTranslationProvider/FinetuneBatchTask.cs
Expand Up @@ -17,8 +17,8 @@ namespace FiskmoTranslationProvider
{

[AutomaticTask("OPUSCATBatchTask",
"OPUS-CAT finetune and preorder machine translation",
"Task for finetuning OPUS MT models with project data, with optional preordering of MT for new segments in project (makes fetching MT much quicker during translation). IMPORTANT: Segment the files before running this task by opening them in the editor and saving, or by running Pretranslate or Pseudotranslate tasks.",
"OPUS-CAT Finetune and Preorder",
"Task for finetuning OPUS MT models, with optional preordering of MT for new segments. IMPORTANT: This task works only on segmented files. If files are not segmented, segment them by opening them in the editor and saving, or by running Pretranslate or Pseudotranslate tasks.",
GeneratedFileType = AutomaticTaskFileType.None)]
//[TODO] You can change the file type according to your needs
[AutomaticTaskSupportedFileType(AutomaticTaskFileType.BilingualTarget)]
Expand Down Expand Up @@ -150,7 +150,7 @@ private void AddFiskmoProviderToProject()
}
}

private void BatchTranslate()
private void PreOrderMt()
{
var projectInfo = this.Project.GetProjectInfo();
var projectGuid = projectInfo.Id;
Expand All @@ -161,7 +161,7 @@ private void BatchTranslate()
var targetCode = targetLang.CultureInfo.TwoLetterISOLanguageName;
var uniqueNewSegments = this.ProjectNewSegments[targetLang].Distinct().ToList();
//Send the new segments to MT service
var result = FiskmöMTServiceHelper.PreTranslateBatch(fiskmoOptions.mtServiceAddress, fiskmoOptions.mtServicePort, uniqueNewSegments, sourceCode, targetCode, fiskmoOptions.modelTag);
var result = FiskmöMTServiceHelper.PreOrderBatch(fiskmoOptions.mtServiceAddress, fiskmoOptions.mtServicePort, uniqueNewSegments, sourceCode, targetCode, fiskmoOptions.modelTag);

switch (result)
{
Expand Down Expand Up @@ -267,9 +267,9 @@ public override void TaskComplete()
//Send the new segments to MT engine for pretranslation.
//If finetuning is selected, the new segments are translated after
//customization finished, so this is only for BatchTranslateOnly
if (settings.BatchTranslate == true && settings.Finetune == false)
if (settings.PreOrderMtForNewSegments == true && settings.Finetune == false)
{
this.BatchTranslate();
this.PreOrderMt();
}
}

Expand Down
6 changes: 3 additions & 3 deletions FiskmoTranslationProvider/FinetuneBatchTaskSettings.cs
Expand Up @@ -102,15 +102,15 @@ public bool IncludeTagPairs
}
}

public bool BatchTranslate
public bool PreOrderMtForNewSegments
{
get
{
return GetSetting<bool>(nameof(BatchTranslate));
return GetSetting<bool>(nameof(PreOrderMtForNewSegments));
}
set
{
GetSetting<bool>(nameof(BatchTranslate)).Value = value;
GetSetting<bool>(nameof(PreOrderMtForNewSegments)).Value = value;
NotifyPropertyChanged();
}
}
Expand Down
8 changes: 4 additions & 4 deletions FiskmoTranslationProvider/FinetuneWpfControl.xaml.cs
Expand Up @@ -72,7 +72,7 @@ public FinetuneWpfControl(FinetuneBatchTaskSettings Settings)
this.Settings = Settings;
//Mode defaults, changeable with radio buttons
this.Settings.Finetune = true;
this.Settings.BatchTranslate = true;
this.Settings.PreOrderMtForNewSegments = true;
//Some settings are initially held in a FiskmoOptions object (the shared properties
//with the translation provider settings).
this.Options = new FiskmoOptions();
Expand All @@ -95,16 +95,16 @@ private void ModeButton_Checked(object sender, RoutedEventArgs e)
switch (radioButton.Name)
{
case "FinetuneAndTranslate":
this.Settings.BatchTranslate = true;
this.Settings.PreOrderMtForNewSegments = true;
this.Settings.Finetune = true;
break;
case "FinetuneOnly":
this.Settings.Finetune = true;
this.Settings.BatchTranslate = false;
this.Settings.PreOrderMtForNewSegments = false;
break;
case "TranslateOnly":
this.Settings.Finetune = false;
this.Settings.BatchTranslate = true;
this.Settings.PreOrderMtForNewSegments = true;
break;
}

Expand Down
24 changes: 10 additions & 14 deletions FiskmoTranslationProvider/FiskmoProvider.cs
Expand Up @@ -218,19 +218,16 @@ private static void segmentChanged(FiskmoOptions options, LanguageDirection lang
return;
}

//TODO: time this to see if it's a bottleneck during translation.
//If this is too slow, it might be best to go with a doc changed handler that would collect all the source texts
//once as soon as the doc is changed and then you could use that collection to run the
//next segment checks.
//TESTED: doesn't seem slow at all, probably the translation part later that causes delay.
var sourceSegmentTexts = new List<string>();

var nextSegmentPairs = doc.SegmentPairs.SkipWhile(x =>
!(x.Properties.Id == doc.ActiveSegmentPair.Properties.Id &&
x.GetParagraphUnitProperties().ParagraphUnitId == doc.ActiveSegmentPair.GetParagraphUnitProperties().ParagraphUnitId));

var segmentsNeeded = options.pregenerateSegmentCount;
foreach (var segmentPair in nextSegmentPairs)
{
if (segmentsNeeded == 0)
if (sourceSegmentTexts.Count == segmentsNeeded)
{
break;
}
Expand All @@ -243,16 +240,15 @@ private static void segmentChanged(FiskmoOptions options, LanguageDirection lang
visitor.Reset();
segmentPair.Source.AcceptVisitor(visitor);
var sourceText = visitor.PlainText;

var sourceCode = langDir.SourceLanguage.CultureInfo.TwoLetterISOLanguageName;
var targetCode = langDir.TargetLanguage.CultureInfo.TwoLetterISOLanguageName;
var langpair = $"{sourceCode}-{targetCode}";

//The preorder method doesn't wait for the translation, so the requests return quicker
FiskmöMTServiceHelper.PreOrder(options, sourceText, sourceCode, targetCode, options.modelTag);
segmentsNeeded -= 1;
sourceSegmentTexts.Add(sourceText);
}
}

var sourceCode = langDir.SourceLanguage.CultureInfo.TwoLetterISOLanguageName;
var targetCode = langDir.TargetLanguage.CultureInfo.TwoLetterISOLanguageName;

//The preorder method doesn't wait for the translation, so the requests return quicker
FiskmöMTServiceHelper.PreOrderBatch(options, sourceSegmentTexts, sourceCode, targetCode, options.modelTag);
}

//THIS IS DEPRECATED, REPLACED WITH SEGMENT CHANGE HANDLER EVENT
Expand Down
9 changes: 5 additions & 4 deletions FiskmoTranslationProvider/FiskmöMTServiceHelper.cs
Expand Up @@ -163,26 +163,27 @@ public static string Translate(FiskmoOptions options, string input, string srcLa
}
}

public static void PreOrder(FiskmoOptions options, string input, string srcLangCode, string trgLangCode, string modelTag)

public static void PreOrderBatch(FiskmoOptions options, List<string> input, string srcLangCode, string trgLangCode, string modelTag)
{
Task.Run(() =>
{
// Always dispose allocated resources
var proxy = getNewProxy(options.mtServiceAddress, options.mtServicePort);
using (proxy as IDisposable)
{
proxy.Translate(GetTokenCode(options), input, srcLangCode, trgLangCode, modelTag);
proxy.PreOrderBatch(GetTokenCode(options), input, srcLangCode, trgLangCode, modelTag);
}
});
}

internal static string PreTranslateBatch(string host, string mtServicePort, List<string> projectNewSegments, string sourceCode, string targetCode, string modelTag)
internal static string PreOrderBatch(string host, string mtServicePort, List<string> projectNewSegments, string sourceCode, string targetCode, string modelTag)
{
var proxy = getNewProxy(host, mtServicePort);

using (proxy as IDisposable)
{
return proxy.PreTranslateBatch(GetTokenCode(host, mtServicePort), projectNewSegments, sourceCode, targetCode, modelTag);
return proxy.PreOrderBatch(GetTokenCode(host, mtServicePort), projectNewSegments, sourceCode, targetCode, modelTag);
}
}

Expand Down
3 changes: 1 addition & 2 deletions OpusMTInterface/IMTService.cs
Expand Up @@ -60,9 +60,8 @@ public interface IMTService

[OperationContract]
[WebInvoke(Method = "POST", BodyStyle = WebMessageBodyStyle.Wrapped)]
string PreTranslateBatch(string tokenCode, List<string> input, string srcLangCode, string trgLangCode, String modelId);
string PreOrderBatch(string tokenCode, List<string> input, string srcLangCode, string trgLangCode, String modelId);


[OperationContract]
[WebGet]
void StoreTranslation(string tokenCode, string source, string target, string srcLangCode, string trgLangCode);
Expand Down
18 changes: 16 additions & 2 deletions OpusMTService/MTService.cs
Expand Up @@ -174,7 +174,7 @@ public List<string> BatchTranslate(string tokenCode, List<string> input, string
/// <param name="input"></param>
/// <param name="srcLangCode"></param>
/// <param name="trgLangCode"></param>
public string PreTranslateBatch(string tokenCode, List<string> input, string srcLangCode, string trgLangCode, string modelTag)
public string PreOrderBatch(string tokenCode, List<string> input, string srcLangCode, string trgLangCode, string modelTag)
{

if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode))
Expand All @@ -188,6 +188,18 @@ public string PreTranslateBatch(string tokenCode, List<string> input, string src
return "input was empty";
}

foreach (var inputString in input)
{
this.ModelManager.Translate(inputString, sourceLang, targetLang, modelTag);
}

/* Batch preordering was done earlier with batch translation, but it doesn't seem
* to be much quicker than normal translation, and it has to problem of providing all
* the translations at once in the end. Using normal translation means the MT is ready
* as soon as a sentence gets translated (you could do this for batch translation as well
* by adding an outputline handler, but it's not implemented yet). Batch translation should be
* much quicker, need to test for correct parameters, so stick with this. Using normal translate
* is also more robust, one less thing to break.
if (!this.ModelManager.BatchTranslationOngoing && !this.ModelManager.CustomizationOngoing)
{
this.ModelManager.PreTranslateBatch(input, sourceLang, targetLang, modelTag);
Expand All @@ -196,7 +208,9 @@ public string PreTranslateBatch(string tokenCode, List<string> input, string src
else
{
return "batch translation or customization already in process";
}
}*/

return "preorder received";
}


Expand Down
3 changes: 3 additions & 0 deletions OpusMTService/Marian/MarianTrainerConfig.cs
Expand Up @@ -32,6 +32,9 @@ public class MarianTrainerConfig
[YamlMember(Alias = "valid-translation-output", ApplyNamingConventions = false)]
public string validTranslationOutput { get; set; }

[YamlMember(Alias = "valid-max-length", ApplyNamingConventions = false)]
public string validMaxLength { get; set; }

[YamlMember(Alias = "guided-alignment", ApplyNamingConventions = false)]
public string guidedAlignment { get; set; }

Expand Down
1 change: 1 addition & 0 deletions OpusMTService/customize.yml
Expand Up @@ -10,5 +10,6 @@ early-stopping: 20
valid-freq: 100u
valid-metrics:
- translation
valid-max-length: 200
gradient-checkpointing: true
shuffle-in-ram: true

0 comments on commit 02540aa

Please sign in to comment.