diff --git a/FinetuneTestsetExtractor/FinetuneWpfControl.xaml.cs b/FinetuneTestsetExtractor/FinetuneWpfControl.xaml.cs index 9859a42..7683c89 100644 --- a/FinetuneTestsetExtractor/FinetuneWpfControl.xaml.cs +++ b/FinetuneTestsetExtractor/FinetuneWpfControl.xaml.cs @@ -72,7 +72,7 @@ public FinetuneWpfControl(FinetuneBatchTaskSettings Settings) this.Settings = Settings; //Mode defaults, changeable with radio buttons this.Settings.Finetune = true; - this.Settings.BatchTranslate = true; + this.Settings.PreOrderMtForNewSegments = true; //Some settings are initially held in a FiskmoOptions object (the shared properties //with the translation provider settings). this.Options = new FiskmoOptions(); @@ -95,16 +95,16 @@ private void ModeButton_Checked(object sender, RoutedEventArgs e) switch (radioButton.Name) { case "FinetuneAndTranslate": - this.Settings.BatchTranslate = true; + this.Settings.PreOrderMtForNewSegments = true; this.Settings.Finetune = true; break; case "FinetuneOnly": this.Settings.Finetune = true; - this.Settings.BatchTranslate = false; + this.Settings.PreOrderMtForNewSegments = false; break; case "TranslateOnly": this.Settings.Finetune = false; - this.Settings.BatchTranslate = true; + this.Settings.PreOrderMtForNewSegments = true; break; } diff --git a/FiskmoTranslationProvider/FinetuneBatchTask.cs b/FiskmoTranslationProvider/FinetuneBatchTask.cs index ff05db0..48d77d3 100644 --- a/FiskmoTranslationProvider/FinetuneBatchTask.cs +++ b/FiskmoTranslationProvider/FinetuneBatchTask.cs @@ -17,8 +17,8 @@ namespace FiskmoTranslationProvider { [AutomaticTask("OPUSCATBatchTask", - "OPUS-CAT finetune and preorder machine translation", - "Task for finetuning OPUS MT models with project data, with optional preordering of MT for new segments in project (makes fetching MT much quicker during translation). IMPORTANT: Segment the files before running this task by opening them in the editor and saving, or by running Pretranslate or Pseudotranslate tasks.", + "OPUS-CAT Finetune and Preorder", + "Task for finetuning OPUS MT models, with optional preordering of MT for new segments. IMPORTANT: This task works only on segmented files. If files are not segmented, segment them by opening them in the editor and saving, or by running Pretranslate or Pseudotranslate tasks.", GeneratedFileType = AutomaticTaskFileType.None)] //[TODO] You can change the file type according to your needs [AutomaticTaskSupportedFileType(AutomaticTaskFileType.BilingualTarget)] @@ -150,7 +150,7 @@ private void AddFiskmoProviderToProject() } } - private void BatchTranslate() + private void PreOrderMt() { var projectInfo = this.Project.GetProjectInfo(); var projectGuid = projectInfo.Id; @@ -161,7 +161,7 @@ private void BatchTranslate() var targetCode = targetLang.CultureInfo.TwoLetterISOLanguageName; var uniqueNewSegments = this.ProjectNewSegments[targetLang].Distinct().ToList(); //Send the new segments to MT service - var result = FiskmöMTServiceHelper.PreTranslateBatch(fiskmoOptions.mtServiceAddress, fiskmoOptions.mtServicePort, uniqueNewSegments, sourceCode, targetCode, fiskmoOptions.modelTag); + var result = FiskmöMTServiceHelper.PreOrderBatch(fiskmoOptions.mtServiceAddress, fiskmoOptions.mtServicePort, uniqueNewSegments, sourceCode, targetCode, fiskmoOptions.modelTag); switch (result) { @@ -267,9 +267,9 @@ public override void TaskComplete() //Send the new segments to MT engine for pretranslation. //If finetuning is selected, the new segments are translated after //customization finished, so this is only for BatchTranslateOnly - if (settings.BatchTranslate == true && settings.Finetune == false) + if (settings.PreOrderMtForNewSegments == true && settings.Finetune == false) { - this.BatchTranslate(); + this.PreOrderMt(); } } diff --git a/FiskmoTranslationProvider/FinetuneBatchTaskSettings.cs b/FiskmoTranslationProvider/FinetuneBatchTaskSettings.cs index 3fdea49..46bea75 100644 --- a/FiskmoTranslationProvider/FinetuneBatchTaskSettings.cs +++ b/FiskmoTranslationProvider/FinetuneBatchTaskSettings.cs @@ -102,15 +102,15 @@ public bool IncludeTagPairs } } - public bool BatchTranslate + public bool PreOrderMtForNewSegments { get { - return GetSetting(nameof(BatchTranslate)); + return GetSetting(nameof(PreOrderMtForNewSegments)); } set { - GetSetting(nameof(BatchTranslate)).Value = value; + GetSetting(nameof(PreOrderMtForNewSegments)).Value = value; NotifyPropertyChanged(); } } diff --git a/FiskmoTranslationProvider/FinetuneWpfControl.xaml.cs b/FiskmoTranslationProvider/FinetuneWpfControl.xaml.cs index 9859a42..7683c89 100644 --- a/FiskmoTranslationProvider/FinetuneWpfControl.xaml.cs +++ b/FiskmoTranslationProvider/FinetuneWpfControl.xaml.cs @@ -72,7 +72,7 @@ public FinetuneWpfControl(FinetuneBatchTaskSettings Settings) this.Settings = Settings; //Mode defaults, changeable with radio buttons this.Settings.Finetune = true; - this.Settings.BatchTranslate = true; + this.Settings.PreOrderMtForNewSegments = true; //Some settings are initially held in a FiskmoOptions object (the shared properties //with the translation provider settings). this.Options = new FiskmoOptions(); @@ -95,16 +95,16 @@ private void ModeButton_Checked(object sender, RoutedEventArgs e) switch (radioButton.Name) { case "FinetuneAndTranslate": - this.Settings.BatchTranslate = true; + this.Settings.PreOrderMtForNewSegments = true; this.Settings.Finetune = true; break; case "FinetuneOnly": this.Settings.Finetune = true; - this.Settings.BatchTranslate = false; + this.Settings.PreOrderMtForNewSegments = false; break; case "TranslateOnly": this.Settings.Finetune = false; - this.Settings.BatchTranslate = true; + this.Settings.PreOrderMtForNewSegments = true; break; } diff --git a/FiskmoTranslationProvider/FiskmoProvider.cs b/FiskmoTranslationProvider/FiskmoProvider.cs index 76bac90..7754be1 100644 --- a/FiskmoTranslationProvider/FiskmoProvider.cs +++ b/FiskmoTranslationProvider/FiskmoProvider.cs @@ -218,11 +218,8 @@ private static void segmentChanged(FiskmoOptions options, LanguageDirection lang return; } - //TODO: time this to see if it's a bottleneck during translation. - //If this is too slow, it might be best to go with a doc changed handler that would collect all the source texts - //once as soon as the doc is changed and then you could use that collection to run the - //next segment checks. - //TESTED: doesn't seem slow at all, probably the translation part later that causes delay. + var sourceSegmentTexts = new List(); + var nextSegmentPairs = doc.SegmentPairs.SkipWhile(x => !(x.Properties.Id == doc.ActiveSegmentPair.Properties.Id && x.GetParagraphUnitProperties().ParagraphUnitId == doc.ActiveSegmentPair.GetParagraphUnitProperties().ParagraphUnitId)); @@ -230,7 +227,7 @@ private static void segmentChanged(FiskmoOptions options, LanguageDirection lang var segmentsNeeded = options.pregenerateSegmentCount; foreach (var segmentPair in nextSegmentPairs) { - if (segmentsNeeded == 0) + if (sourceSegmentTexts.Count == segmentsNeeded) { break; } @@ -243,16 +240,15 @@ private static void segmentChanged(FiskmoOptions options, LanguageDirection lang visitor.Reset(); segmentPair.Source.AcceptVisitor(visitor); var sourceText = visitor.PlainText; - - var sourceCode = langDir.SourceLanguage.CultureInfo.TwoLetterISOLanguageName; - var targetCode = langDir.TargetLanguage.CultureInfo.TwoLetterISOLanguageName; - var langpair = $"{sourceCode}-{targetCode}"; - - //The preorder method doesn't wait for the translation, so the requests return quicker - FiskmöMTServiceHelper.PreOrder(options, sourceText, sourceCode, targetCode, options.modelTag); - segmentsNeeded -= 1; + sourceSegmentTexts.Add(sourceText); } } + + var sourceCode = langDir.SourceLanguage.CultureInfo.TwoLetterISOLanguageName; + var targetCode = langDir.TargetLanguage.CultureInfo.TwoLetterISOLanguageName; + + //The preorder method doesn't wait for the translation, so the requests return quicker + FiskmöMTServiceHelper.PreOrderBatch(options, sourceSegmentTexts, sourceCode, targetCode, options.modelTag); } //THIS IS DEPRECATED, REPLACED WITH SEGMENT CHANGE HANDLER EVENT diff --git "a/FiskmoTranslationProvider/Fiskm\303\266MTServiceHelper.cs" "b/FiskmoTranslationProvider/Fiskm\303\266MTServiceHelper.cs" index a06b292..a685a9d 100644 --- "a/FiskmoTranslationProvider/Fiskm\303\266MTServiceHelper.cs" +++ "b/FiskmoTranslationProvider/Fiskm\303\266MTServiceHelper.cs" @@ -163,7 +163,8 @@ public static string Translate(FiskmoOptions options, string input, string srcLa } } - public static void PreOrder(FiskmoOptions options, string input, string srcLangCode, string trgLangCode, string modelTag) + + public static void PreOrderBatch(FiskmoOptions options, List input, string srcLangCode, string trgLangCode, string modelTag) { Task.Run(() => { @@ -171,18 +172,18 @@ public static void PreOrder(FiskmoOptions options, string input, string srcLangC var proxy = getNewProxy(options.mtServiceAddress, options.mtServicePort); using (proxy as IDisposable) { - proxy.Translate(GetTokenCode(options), input, srcLangCode, trgLangCode, modelTag); + proxy.PreOrderBatch(GetTokenCode(options), input, srcLangCode, trgLangCode, modelTag); } }); } - internal static string PreTranslateBatch(string host, string mtServicePort, List projectNewSegments, string sourceCode, string targetCode, string modelTag) + internal static string PreOrderBatch(string host, string mtServicePort, List projectNewSegments, string sourceCode, string targetCode, string modelTag) { var proxy = getNewProxy(host, mtServicePort); using (proxy as IDisposable) { - return proxy.PreTranslateBatch(GetTokenCode(host, mtServicePort), projectNewSegments, sourceCode, targetCode, modelTag); + return proxy.PreOrderBatch(GetTokenCode(host, mtServicePort), projectNewSegments, sourceCode, targetCode, modelTag); } } diff --git a/OpusMTInterface/IMTService.cs b/OpusMTInterface/IMTService.cs index b1ad2d1..69d255b 100644 --- a/OpusMTInterface/IMTService.cs +++ b/OpusMTInterface/IMTService.cs @@ -60,9 +60,8 @@ public interface IMTService [OperationContract] [WebInvoke(Method = "POST", BodyStyle = WebMessageBodyStyle.Wrapped)] - string PreTranslateBatch(string tokenCode, List input, string srcLangCode, string trgLangCode, String modelId); + string PreOrderBatch(string tokenCode, List input, string srcLangCode, string trgLangCode, String modelId); - [OperationContract] [WebGet] void StoreTranslation(string tokenCode, string source, string target, string srcLangCode, string trgLangCode); diff --git a/OpusMTService/MTService.cs b/OpusMTService/MTService.cs index 4ee1e66..b83002a 100644 --- a/OpusMTService/MTService.cs +++ b/OpusMTService/MTService.cs @@ -174,7 +174,7 @@ public List BatchTranslate(string tokenCode, List input, string /// /// /// - public string PreTranslateBatch(string tokenCode, List input, string srcLangCode, string trgLangCode, string modelTag) + public string PreOrderBatch(string tokenCode, List input, string srcLangCode, string trgLangCode, string modelTag) { if (!TokenCodeGenerator.Instance.TokenCodeIsValid(tokenCode)) @@ -188,6 +188,18 @@ public string PreTranslateBatch(string tokenCode, List input, string src return "input was empty"; } + foreach (var inputString in input) + { + this.ModelManager.Translate(inputString, sourceLang, targetLang, modelTag); + } + + /* Batch preordering was done earlier with batch translation, but it doesn't seem + * to be much quicker than normal translation, and it has to problem of providing all + * the translations at once in the end. Using normal translation means the MT is ready + * as soon as a sentence gets translated (you could do this for batch translation as well + * by adding an outputline handler, but it's not implemented yet). Batch translation should be + * much quicker, need to test for correct parameters, so stick with this. Using normal translate + * is also more robust, one less thing to break. if (!this.ModelManager.BatchTranslationOngoing && !this.ModelManager.CustomizationOngoing) { this.ModelManager.PreTranslateBatch(input, sourceLang, targetLang, modelTag); @@ -196,7 +208,9 @@ public string PreTranslateBatch(string tokenCode, List input, string src else { return "batch translation or customization already in process"; - } + }*/ + + return "preorder received"; } diff --git a/OpusMTService/Marian/MarianTrainerConfig.cs b/OpusMTService/Marian/MarianTrainerConfig.cs index 9044262..538dce3 100644 --- a/OpusMTService/Marian/MarianTrainerConfig.cs +++ b/OpusMTService/Marian/MarianTrainerConfig.cs @@ -32,6 +32,9 @@ public class MarianTrainerConfig [YamlMember(Alias = "valid-translation-output", ApplyNamingConventions = false)] public string validTranslationOutput { get; set; } + [YamlMember(Alias = "valid-max-length", ApplyNamingConventions = false)] + public string validMaxLength { get; set; } + [YamlMember(Alias = "guided-alignment", ApplyNamingConventions = false)] public string guidedAlignment { get; set; } diff --git a/OpusMTService/customize.yml b/OpusMTService/customize.yml index b8ddedd..f071990 100644 --- a/OpusMTService/customize.yml +++ b/OpusMTService/customize.yml @@ -10,5 +10,6 @@ early-stopping: 20 valid-freq: 100u valid-metrics: - translation +valid-max-length: 200 gradient-checkpointing: true shuffle-in-ram: true \ No newline at end of file