Skip to content

Commit

Permalink
Strip html tags (#28)
Browse files Browse the repository at this point in the history
Add a helper for stripping html tags
  • Loading branch information
jinxiwu authored and RamjotSingh committed May 22, 2019
1 parent 142c5b7 commit e3139b8
Showing 1 changed file with 54 additions and 0 deletions.
@@ -0,0 +1,54 @@
// <copyright file="MessageActionsPayloadBodyExtensions.cs" company="Microsoft">
// Licensed under the MIT License.
// </copyright>

namespace Microsoft.Bot.Schema.Teams
{
using System.Collections.Generic;
using HtmlAgilityPack;

/// <summary>
/// MessageActionsPayloadBody extensions.
/// </summary>

public static class MessageActionsPayloadBodyExtensions
{
private static readonly HashSet<string> textRestrictedHtmlTags = new HashSet<string> { "at", "attachment" };

/// <summary>
/// Strip HTML tags from MessageActionsPayloadBody content.
/// </summary>
/// <param name="body">The MessageActionsPayloadBody.</param>
/// <returns>Plain text content.</returns>
public static string GetPlainTextContent(this MessageActionsPayloadBody body)
{
var doc = new HtmlDocument();
doc.LoadHtml(body.content);
return StripHtmlTags(doc.DocumentNode, textRestrictedHtmlTags);
}

private static string StripHtmlTags(HtmlNode node, ISet<string> tags)
{
string result = "";
if (tags.Contains(node.Name))
{
result += node.OuterHtml;
}
else
{
foreach (HtmlNode childNode in node.ChildNodes)
{
if (childNode.NodeType == HtmlNodeType.Text)
{
result += childNode.InnerText;
}
else
{
result += StripHtmlTags(childNode, tags);
}
}
}
return result;
}
}
}

0 comments on commit e3139b8

Please sign in to comment.