Skip to content

Commit

Permalink
Feature/configurable types and keys (#12)
Browse files Browse the repository at this point in the history
* replaced core vocabs with source

* setting metadata based on request

* fix method call

* added optional configuration

* chore: Added release notes

* chore: Update obselete reference to ToHashSetEx

---------

Co-authored-by: Ziggy <zygis0408@gmail.com>
  • Loading branch information
andrew-hardy and zygis0408 committed Dec 13, 2023
1 parent 5568345 commit fbc388e
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 43 deletions.
2 changes: 2 additions & 0 deletions docs/4.0.0-release-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Features
- Update to support CluedIn 4.0
46 changes: 46 additions & 0 deletions src/Vocabularies/WebsiteVocabulary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,57 @@ public WebsiteVocabulary()
this.Title = this.Add(new VocabularyKey("Title"));
this.Logo = this.Add(new VocabularyKey("Logo", VocabularyKeyVisibility.Hidden));
this.CopyrightEntity = this.Add(new VocabularyKey("CopyrightEntity"));
this.WebsiteDescription = this.Add(new VocabularyKey("WebsiteDescription"));
this.Name = this.Add(new VocabularyKey("Name"));
this.URI = this.Add(new VocabularyKey("URI"));
this.PhoneNumber = this.Add(new VocabularyKey("PhoneNumber"));
this.FaxNumber = this.Add(new VocabularyKey("FaxNumber"));
this.ContactEmail = this.Add(new VocabularyKey("ContactEmail"));
this.Address = this.Add(new VocabularyKey("Address"));
this.Country = this.Add(new VocabularyKey("Country"));
this.TechnologiesListText = this.Add(new VocabularyKey("TechnologiesListText"));
this.AddressCountry = this.Add(new VocabularyKey("AddressCountry"));
this.PostalCode = this.Add(new VocabularyKey("PostalCode"));
this.StreetAddress = this.Add(new VocabularyKey("StreetAddress"));
this.FoundingDate = this.Add(new VocabularyKey("FoundingDate"));
this.Duns = this.Add(new VocabularyKey("Duns"));
this.GlobalLocationNumber = this.Add(new VocabularyKey("GlobalLocationNumber"));
this.IsicV4 = this.Add(new VocabularyKey("IsicV4"));
this.LeiCode = this.Add(new VocabularyKey("LeiCode"));
this.Naics = this.Add(new VocabularyKey("Naics"));
this.TaxId = this.Add(new VocabularyKey("TaxId"));
this.VatId = this.Add(new VocabularyKey("VatId"));
this.TickerSymbol = this.Add(new VocabularyKey("TickerSymbol"));
this.CVR = this.Add(new VocabularyKey("CVR"));
this.GoogleAnalytics = this.Add(new VocabularyKey("GoogleAnalytics"));
}

public VocabularyKey Description { get; protected set; }
public VocabularyKey Title { get; protected set; }
public VocabularyKey Logo { get; protected set; }
public VocabularyKey CopyrightEntity { get; protected set; }
public VocabularyKey WebsiteDescription { get; protected set; }
public VocabularyKey Name { get; protected set; }
public VocabularyKey URI { get; protected set; }
public VocabularyKey PhoneNumber { get; protected set; }
public VocabularyKey FaxNumber { get; protected set; }
public VocabularyKey ContactEmail { get; protected set; }
public VocabularyKey Address { get; protected set; }
public VocabularyKey Country { get; protected set; }
public VocabularyKey TechnologiesListText { get; protected set; }
public VocabularyKey AddressCountry { get; protected set; }
public VocabularyKey PostalCode { get; protected set; }
public VocabularyKey StreetAddress { get; protected set; }
public VocabularyKey FoundingDate { get; protected set; }
public VocabularyKey Duns { get; protected set; }
public VocabularyKey GlobalLocationNumber { get; protected set; }
public VocabularyKey IsicV4 { get; protected set; }
public VocabularyKey LeiCode { get; protected set; }
public VocabularyKey Naics { get; protected set; }
public VocabularyKey TaxId { get; protected set; }
public VocabularyKey VatId { get; protected set; }
public VocabularyKey TickerSymbol { get; protected set; }
public VocabularyKey CVR { get; protected set; }
public VocabularyKey GoogleAnalytics { get; protected set; }
}
}
19 changes: 18 additions & 1 deletion src/WebExternalSearchConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ public static class WebExternalSearchConstants

public struct KeyName
{
public const string ApiToken = "apiToken";
public const string AcceptedEntityType = "acceptedEntityType";
public const string WebsiteKey = "websiteKey";

}

Expand All @@ -25,6 +26,22 @@ public struct KeyName
public static AuthMethods AuthMethods { get; set; } = new AuthMethods
{
token = new List<Control>()
{
new Control()
{
displayName = "Accepted Entity Type",
type = "input",
isRequired = false,
name = KeyName.AcceptedEntityType
},
new Control()
{
displayName = "Website vocab key",
type = "input",
isRequired = false,
name = KeyName.WebsiteKey
},
}
};

public static IEnumerable<Control> Properties { get; set; } = new List<Control>()
Expand Down
11 changes: 9 additions & 2 deletions src/WebExternalSearchJobData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,19 @@ public class WebExternalSearchJobData : CrawlJobData
{
public WebExternalSearchJobData(IDictionary<string, object> configuration)
{
// ApiToken = GetValue<string>(configuration, WebConstants.KeyName.ApiToken);
AcceptedEntityType = GetValue<string>(configuration, WebExternalSearchConstants.KeyName.AcceptedEntityType);
WebsiteKey = GetValue<string>(configuration, WebExternalSearchConstants.KeyName.WebsiteKey);
}

public IDictionary<string, object> ToDictionary()
{
return new Dictionary<string, object>();
return new Dictionary<string, object>()
{
{ WebExternalSearchConstants.KeyName.AcceptedEntityType, AcceptedEntityType },
{ WebExternalSearchConstants.KeyName.WebsiteKey, WebsiteKey }
};
}
public string AcceptedEntityType { get; set; }
public string WebsiteKey { get; set; }
}
}
96 changes: 56 additions & 40 deletions src/WebExternalSearchProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,21 @@ public WebExternalSearchProvider()
/// <returns>The search queries.</returns>
public override IEnumerable<IExternalSearchQuery> BuildQueries(ExecutionContext context, IExternalSearchRequest request)
{
if (!this.Accepts(request.EntityMetaData.EntityType))
yield break;
foreach (var externalSearchQuery in InternalBuildQueries(context, request))
{
yield return externalSearchQuery;
}
}
private IEnumerable<IExternalSearchQuery> InternalBuildQueries(ExecutionContext context, IExternalSearchRequest request, IDictionary<string, object> config = null)
{
if (config.TryGetValue(WebExternalSearchConstants.KeyName.AcceptedEntityType, out var customType) && !string.IsNullOrWhiteSpace(customType.ToString()))
{
if (!request.EntityMetaData.EntityType.Is(customType.ToString()))
{
yield break;
}
}
else if (!this.Accepts(request.EntityMetaData.EntityType)) yield break;

var existingResults = request.GetQueryResults<WebResult>(this).ToList();

Expand All @@ -81,7 +94,15 @@ public override IEnumerable<IExternalSearchQuery> BuildQueries(ExecutionContext

// Query Input
var entityType = request.EntityMetaData.EntityType;
var website = request.QueryParameters.GetValue(CluedIn.Core.Data.Vocabularies.Vocabularies.CluedInOrganization.Website, null);
var website = new HashSet<string>();
if (config.TryGetValue(WebExternalSearchConstants.KeyName.WebsiteKey, out var customVocabKeyWebsite) && !string.IsNullOrWhiteSpace(customVocabKeyWebsite?.ToString()))
{
website = request.QueryParameters.GetValue<string, HashSet<string>>(customVocabKeyWebsite.ToString(), new HashSet<string>());
}
else
{
website = request.QueryParameters.GetValue(CluedIn.Core.Data.Vocabularies.Vocabularies.CluedInOrganization.Website, new HashSet<string>()).ToHashSet();
}

if (website != null)
{
Expand Down Expand Up @@ -146,7 +167,7 @@ public override IEnumerable<Clue> BuildClues(ExecutionContext context, IExternal
{
var resultItem = result.As<WebResult>();

var code = this.GetOriginEntityCode(resultItem);
var code = this.GetOriginEntityCode(resultItem, request);

var clue = new Clue(code, context.Organization);
clue.Data.OriginProviderDefinitionId = this.Id;
Expand Down Expand Up @@ -235,9 +256,9 @@ private IEntityMetadata CreateMetadata(ExecutionContext context, IExternalSearch
/// <summary>Gets the origin entity code.</summary>
/// <param name="resultItem">The result item.</param>
/// <returns>The origin entity code.</returns>
private EntityCode GetOriginEntityCode(IExternalSearchQueryResult<WebResult> resultItem)
private EntityCode GetOriginEntityCode(IExternalSearchQueryResult<WebResult> resultItem, IExternalSearchRequest request)
{
return new EntityCode(EntityType.Organization, this.GetCodeOrigin(), resultItem.Data.RestResponse.ResponseUri.ToString().ToLowerInvariant());
return new EntityCode(EntityType.Organization, this.GetCodeOrigin(), request.EntityMetaData.OriginEntityCode.Value);
}

/// <summary>Gets the code origin.</summary>
Expand All @@ -254,25 +275,20 @@ private CodeOrigin GetCodeOrigin()
/// <param name="request"></param>
private void PopulateMetadata(ExecutionContext context, IEntityMetadata metadata, IExternalSearchQueryResult<WebResult> resultItem, IExternalSearchRequest request)
{
var code = this.GetOriginEntityCode(resultItem);
var code = this.GetOriginEntityCode(resultItem, request);

var orgWebSite = resultItem.Data.GetOrganizationWebsiteMetadata(context);

var name = orgWebSite.Name;

metadata.EntityType = EntityType.Organization;
metadata.Name = name;
metadata.DisplayName = orgWebSite.SchemaOrgOrganization != null ? orgWebSite.SchemaOrgOrganization.LegalName ?? orgWebSite.SchemaOrgOrganization.AlternateName : name;
metadata.EntityType = request.EntityMetaData.EntityType;
metadata.Name = request.EntityMetaData.Name;
metadata.OriginEntityCode = code;
metadata.Uri = orgWebSite.RequestUri;
metadata.Description = orgWebSite.WebsiteDescription;

metadata.Codes.Add(code);
metadata.Codes.Add(new EntityCode(EntityType.Organization, this.GetCodeOrigin(), resultItem.Data.RestResponse.ResponseUri.Host.ToLowerInvariant()));
metadata.Codes.Add(new EntityCode(EntityType.Web.Site, CodeOrigin.CluedIn, orgWebSite.ResponseUri.ToString().ToLowerInvariant())); // Force result to match back to original query

if (request.EntityMetaData != null && request.EntityMetaData.OriginEntityCode != null)
metadata.Codes.Add(request.EntityMetaData.OriginEntityCode);
metadata.Codes.Add(request.EntityMetaData.OriginEntityCode);

//// Aliases
if (orgWebSite.SchemaOrgOrganization != null)
Expand All @@ -289,44 +305,44 @@ private void PopulateMetadata(ExecutionContext context, IEntityMetadata metadata

var technologiesListText = string.Join(", ", orgWebSite.Technologies.Select(t => t.Name).OrderBy(t => t));

metadata.Properties[CluedInVocabularies.CluedInOrganization.Website] = resultItem.Data.RequestUri.PrintIfAvailable();
metadata.Properties[WebVocabulary.Website.URI] = resultItem.Data.RequestUri.PrintIfAvailable();

metadata.Properties[WebVocabulary.Website.Description] = orgWebSite.WebsiteDescription;
metadata.Properties[WebVocabulary.Website.WebsiteDescription] = orgWebSite.WebsiteDescription;
metadata.Properties[WebVocabulary.Website.Title] = orgWebSite.WebsiteTitle;
metadata.Properties[WebVocabulary.Website.Logo] = orgWebSite.Logo.PrintIfAvailable();
metadata.Properties[WebVocabulary.Website.CopyrightEntity] = orgWebSite.CopyrightEntity;

metadata.Properties[CluedInVocabularies.CluedInOrganization.OrganizationName] = orgWebSite.Name;
metadata.Properties[CluedInVocabularies.CluedInOrganization.PhoneNumber] = orgWebSite.PhoneNumber;
metadata.Properties[CluedInVocabularies.CluedInOrganization.Fax] = orgWebSite.FaxNumber;
metadata.Properties[CluedInVocabularies.CluedInOrganization.ContactEmail] = orgWebSite.ContactEmail;
metadata.Properties[CluedInVocabularies.CluedInOrganization.Address] = orgWebSite.Address;
metadata.Properties[CluedInVocabularies.CluedInOrganization.AddressCountryCode] = orgWebSite.Country;
metadata.Properties[CluedInVocabularies.CluedInOrganization.UsedTechnologies] = technologiesListText;
metadata.Properties[WebVocabulary.Website.Name] = orgWebSite.Name;
metadata.Properties[WebVocabulary.Website.PhoneNumber] = orgWebSite.PhoneNumber;
metadata.Properties[WebVocabulary.Website.FaxNumber] = orgWebSite.FaxNumber;
metadata.Properties[WebVocabulary.Website.ContactEmail] = orgWebSite.ContactEmail;
metadata.Properties[WebVocabulary.Website.Address] = orgWebSite.Address;
metadata.Properties[WebVocabulary.Website.Country] = orgWebSite.Country;
metadata.Properties[WebVocabulary.Website.TechnologiesListText] = technologiesListText;

if (orgWebSite.SchemaOrgOrganization != null)
{
var postalAddress = orgWebSite.SchemaOrgOrganization.Address as SchemaOrgPostalAddress;
if (postalAddress != null)
{
metadata.Properties[CluedInVocabularies.CluedInOrganization.AddressCountryCode] = (postalAddress.AddressCountry as SchemaOrgCountry).PrintIfAvailable(v => v.Name) ?? postalAddress.AddressCountry.PrintIfAvailable();
metadata.Properties[CluedInVocabularies.CluedInOrganization.AddressZipCode] = postalAddress.PostalCode;
metadata.Properties[CluedInVocabularies.CluedInOrganization.Address] = postalAddress.StreetAddress;
metadata.Properties[WebVocabulary.Website.AddressCountry] = (postalAddress.AddressCountry as SchemaOrgCountry).PrintIfAvailable(v => v.Name) ?? postalAddress.AddressCountry.PrintIfAvailable();
metadata.Properties[WebVocabulary.Website.PostalCode] = postalAddress.PostalCode;
metadata.Properties[WebVocabulary.Website.Address] = postalAddress.StreetAddress;
}
else if (orgWebSite.SchemaOrgOrganization.Address is SchemaOrgText)
{
metadata.Properties[CluedInVocabularies.CluedInOrganization.Address] = orgWebSite.SchemaOrgOrganization.Address.PrintIfAvailable();
metadata.Properties[WebVocabulary.Website.Address] = orgWebSite.SchemaOrgOrganization.Address.PrintIfAvailable();
}

metadata.Properties[CluedInVocabularies.CluedInOrganization.FoundingDate] = orgWebSite.SchemaOrgOrganization.FoundingDate;
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesDunsNumber] = orgWebSite.SchemaOrgOrganization.Duns;
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesGlobalLocationNumber] = orgWebSite.SchemaOrgOrganization.GlobalLocationNumber;
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesIsicV4] = orgWebSite.SchemaOrgOrganization.IsicV4;
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesLeiCode] = orgWebSite.SchemaOrgOrganization.LeiCode;
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesNAICS] = orgWebSite.SchemaOrgOrganization.Naics;
metadata.Properties[CluedInVocabularies.CluedInOrganization.TaxId] = orgWebSite.SchemaOrgOrganization.TaxId;
metadata.Properties[CluedInVocabularies.CluedInOrganization.VatNumber] = orgWebSite.SchemaOrgOrganization.VatId;
metadata.Properties[CluedInVocabularies.CluedInOrganization.TickerSymbol] = (orgWebSite.SchemaOrgOrganization as SchemaOrgCorporation).PrintIfAvailable(v => v.TickerSymbol);
metadata.Properties[WebVocabulary.Website.FoundingDate] = orgWebSite.SchemaOrgOrganization.FoundingDate;
metadata.Properties[WebVocabulary.Website.Duns] = orgWebSite.SchemaOrgOrganization.Duns;
metadata.Properties[WebVocabulary.Website.GlobalLocationNumber] = orgWebSite.SchemaOrgOrganization.GlobalLocationNumber;
metadata.Properties[WebVocabulary.Website.IsicV4] = orgWebSite.SchemaOrgOrganization.IsicV4;
metadata.Properties[WebVocabulary.Website.LeiCode] = orgWebSite.SchemaOrgOrganization.LeiCode;
metadata.Properties[WebVocabulary.Website.Naics] = orgWebSite.SchemaOrgOrganization.Naics;
metadata.Properties[WebVocabulary.Website.TaxId] = orgWebSite.SchemaOrgOrganization.TaxId;
metadata.Properties[WebVocabulary.Website.TaxId] = orgWebSite.SchemaOrgOrganization.VatId;
metadata.Properties[WebVocabulary.Website.TickerSymbol] = (orgWebSite.SchemaOrgOrganization as SchemaOrgCorporation).PrintIfAvailable(v => v.TickerSymbol);

//orgWebSite.SchemaOrgOrganization.Founders
}
Expand All @@ -353,11 +369,11 @@ private void PopulateMetadata(ExecutionContext context, IEntityMetadata metadata
switch (c.Key)
{
case "cvr":
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesCVR] = c.Value;
metadata.Properties[WebVocabulary.Website.CVR] = c.Value;
break;

case "googleAnalytics":
metadata.Properties[CluedInVocabularies.CluedInOrganization.CodesGoogleAnalytics] = c.Value;
metadata.Properties[WebVocabulary.Website.GoogleAnalytics] = c.Value;
break;

case "swift":
Expand All @@ -379,7 +395,7 @@ public IEnumerable<EntityType> Accepts(IDictionary<string, object> config, IProv

public IEnumerable<IExternalSearchQuery> BuildQueries(ExecutionContext context, IExternalSearchRequest request, IDictionary<string, object> config, IProvider provider)
{
return BuildQueries(context, request);
return InternalBuildQueries(context, request, config);
}

public IEnumerable<IExternalSearchQueryResult> ExecuteSearch(ExecutionContext context, IExternalSearchQuery query, IDictionary<string, object> config, IProvider provider)
Expand Down

0 comments on commit fbc388e

Please sign in to comment.