Skip to content

Commit

Permalink
Update EPARTools post- EMA website update
Browse files Browse the repository at this point in the history
  • Loading branch information
Jongmassey committed Oct 5, 2018
1 parent 555d637 commit d7e27ef
Show file tree
Hide file tree
Showing 3 changed files with 1,715 additions and 31 deletions.
5 changes: 3 additions & 2 deletions VetMedData.NET.Tests/TestEPARTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ public void TestGetSearchResults()
var res = EPARTools.GetSearchResults("metacam").Result;
Assert.IsNotNull(res,"No results returned");
Assert.IsTrue(res.Length>0, "Empty results returned");
Assert.IsTrue(res[0].Equals("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Product_Information/veterinary/000033/WC500065777.pdf")
,$"Wrong url returned: {res[0]}");
Assert.IsTrue(res[0].Equals("https://www.ema.europa.eu/documents/product-information/metacam-epar-product-information_en.pdf")
//"http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Product_Information/veterinary/000033/WC500065777.pdf")
, $"Wrong url returned: {res[0]}");
}
}
}
46 changes: 17 additions & 29 deletions VetMedData.NET/Util/EPARTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;

Expand All @@ -16,11 +17,9 @@ public class EPARTools
{
private static readonly Uri EmaBaseUri = new Uri("http://www.ema.europa.eu/ema/");

//would be nice to use HttpUtility.ParseQueryString and handle as
//NameValueCollection but multiple "status" params get reformatted into
//comma-separated single param that the EMA search endpoint doesn't accept.
private const string EmaSearchUrl =
@"index.jsp?curl=pages%2Fmedicines%2Flanding%2Fvet_epar_search.jsp&mid=WC0b01ac058001fa1c&searchTab=searchByKey&alreadyLoaded=true&isNewQuery=true&status=Authorised&status=Withdrawn&status=Suspended&status=Refused&keyword={prodname}&keywordSearch=Submit&searchType=name&taxonomyPath=&treeNumber=";
@"https://www.ema.europa.eu/medicines/veterinary/EPAR/{prodname}";


// ReSharper disable once InconsistentNaming
public static bool IsEPAR(string url)
Expand Down Expand Up @@ -52,9 +51,8 @@ public static async Task<string[]> GetSearchResults(string productName)
}

/// <summary>
/// Searches for product using EPAR product search. If result found
/// uses an XPath query to get links from document, then filters those to get
/// the link to the product page. Then gets product page, xpath's links and
/// Builds link to the product page then tries to get, returns empty array if not found.
/// Then gets product page, xpath's links and
/// filters those to get english-language PDF of SPC document.
/// </summary>
/// <param name="productName">ReferenceProduct Name to search for</param>
Expand All @@ -64,37 +62,27 @@ private static async Task<string[]> GetSearchResultsInternal(string productName)
using (var cli = new HttpClient())
{
//build search URL
cli.BaseAddress = EmaBaseUri;
//cli.BaseAddress = EmaBaseUri;
var innerlinks = new List<string>();

var res = await cli.GetAsync(EmaSearchUrl.Replace("{prodname}", productName));
if (res.StatusCode != HttpStatusCode.OK)
{
return innerlinks.ToArray();
}

//load results page
var doc = new HtmlDocument();
doc.Load(await res.Content.ReadAsStreamAsync());

//extract list of links
var outerlinks = doc.DocumentNode.SelectNodes("//a[@href]");

//product result (if any) link will be link with same text as search term
//should only be one but, you never know...
foreach (var outerlink in outerlinks.Where(n =>
n.InnerText.Equals(productName, StringComparison.InvariantCultureIgnoreCase)))
{
var productResponse = await cli.GetAsync(outerlink.Attributes["href"].Value);

var innerdoc = new HtmlDocument();
innerdoc.Load(await productResponse.Content.ReadAsStreamAsync());
//extract en_GB pdf links to "ReferenceProduct Information" - i.e. SPC
var doclinks = innerdoc.DocumentNode.SelectNodes("//a[@href]")
.Where(n =>
n.InnerText.Contains("EPAR - Product Information") &&
n.Attributes["href"].Value.Contains("en_GB") &&
n.Attributes["href"].Value.EndsWith(".pdf"))
//format as absolute URI
.Select(n => $"http://{EmaBaseUri.Host}{n.Attributes["href"].Value}");

innerlinks.AddRange(doclinks);
}
var doclinks = doc.DocumentNode.SelectNodes("//a[@href]")
.Where(n =>
n.InnerText.Contains("EPAR - Product Information") &&
n.Attributes["href"].Value.EndsWith(".pdf"))
.Select(n => n.Attributes["href"].Value);
innerlinks.AddRange(doclinks);

return innerlinks.ToArray();
}
Expand Down
Loading

0 comments on commit d7e27ef

Please sign in to comment.