Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 4487 Get Fundamental for CSharp #4703

Merged
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
52 changes: 52 additions & 0 deletions Common/Util/PythonUtil.cs
Expand Up @@ -228,5 +228,57 @@ private static PyObject GetModule()
"def to_func(pyobject, t1, t2):\n" +
" return Func[t1, t2](pyobject)");
}

/// <summary>
/// Convert Python input to a list of Symbols
/// </summary>
/// <param name="input">Object with the desired property</param>
/// <returns>List of Symbols</returns>
public static IEnumerable<Symbol> ConvertToSymbols(PyObject input)
{
List<Symbol> symbolsList;
Symbol symbol;

// Handle the possible types of conversions
if (PyList.IsListType(input))
{
List<string> symbolsStringList;

//Check if an entry in the list is a string type, if so then try and convert the whole list
if (PyString.IsStringType(input[0]) && input.TryConvert(out symbolsStringList))
{
symbolsList = new List<Symbol>();
foreach (var stringSymbol in symbolsStringList)
{
symbol = QuantConnect.Symbol.Create(stringSymbol, SecurityType.Equity, Market.USA);
symbolsList.Add(symbol);
}
}
//Try converting it to list of symbols, if it fails throw exception
else if (!input.TryConvert(out symbolsList))
{
throw new ArgumentException($"Cannot convert list {input.Repr()} to symbols");
}
}
else
{
//Check if its a single string, and try and convert it
string symbolString;
if (PyString.IsStringType(input) && input.TryConvert(out symbolString))
{
symbol = QuantConnect.Symbol.Create(symbolString, SecurityType.Equity, Market.USA);
symbolsList = new List<Symbol> { symbol };
}
else if (input.TryConvert(out symbol))
{
symbolsList = new List<Symbol> { symbol };
}
else
{
throw new Exception($"Cannot convert object {input.Repr()} to symbol");
C-SELLERS marked this conversation as resolved.
Show resolved Hide resolved
}
}
return symbolsList;
}
}
}
219 changes: 184 additions & 35 deletions Research/QuantBook.cs
Expand Up @@ -30,11 +30,13 @@
using QuantConnect.Util;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using QuantConnect.Data.UniverseSelection;
using QuantConnect.Logging;
using QuantConnect.Packets;
using QuantConnect.Lean.Engine.DataFeeds.Enumerators.Factories;
using System.Threading.Tasks;

namespace QuantConnect.Research
{
Expand All @@ -45,11 +47,39 @@ public class QuantBook : QCAlgorithm
{
private dynamic _pandas;
private IDataCacheProvider _dataCacheProvider;
private IDataProvider _dataProvider;
private static bool _isPythonNotebook;

static QuantBook()
{
Logging.Log.LogHandler =
Composer.Instance.GetExportedValueByTypeName<ILogHandler>(Config.Get("log-handler", "CompositeLogHandler"));

//Determine if we are in a Python Notebook
try
{
using (Py.GIL())
{
var isPython = PythonEngine.ModuleFromString(Guid.NewGuid().ToString(),
"try:\n" +
" import IPython\n" +
" def IsPythonNotebook():\n" +
" return (IPython.get_ipython() != None)\n" +
"except:\n" +
" print('No IPython installed')\n" +
" def IsPythonNotebook():\n" +
" return false\n").GetAttr("IsPythonNotebook").Invoke();
isPython.TryConvert(out _isPythonNotebook);
}
}
catch
{
//Default to false
_isPythonNotebook = false;
C-SELLERS marked this conversation as resolved.
Show resolved Hide resolved
Logging.Log.Error("QuantBook failed to determine Notebook kernel language");
}

Logging.Log.Trace($"QuantBook started; Is Python: {_isPythonNotebook}");
}

/// <summary>
Expand Down Expand Up @@ -98,6 +128,7 @@ public QuantBook() : base()
SetObjectStore(algorithmHandlers.ObjectStore);

_dataCacheProvider = new ZipDataCacheProvider(algorithmHandlers.DataProvider);
_dataProvider = algorithmHandlers.DataProvider;

var symbolPropertiesDataBase = SymbolPropertiesDatabase.FromDataFolder();
var registeredTypes = new RegisteredSecurityDataTypesProvider();
Expand Down Expand Up @@ -144,62 +175,122 @@ public QuantBook() : base()
}

/// <summary>
/// Get fundamental data from given symbols
/// Python implementation of GetFundamental, get fundamental data for input symbols or tickers
/// </summary>
/// <param name="pyObject">The symbols to retrieve fundamental data for</param>
/// <param name="input">The symbols or tickers to retrieve fundamental data for</param>
/// <param name="selector">Selects a value from the Fundamental data to filter the request output</param>
/// <param name="start">The start date of selected data</param>
/// <param name="end">The end date of selected data</param>
/// <returns></returns>
public PyObject GetFundamental(PyObject tickers, string selector, DateTime? start = null, DateTime? end = null)
/// <returns>pandas DataFrame</returns>
public PyObject GetFundamental(PyObject input, string selector, DateTime? start = null, DateTime? end = null)
{
//Null selector is not allowed for Python DataFrame
if (string.IsNullOrWhiteSpace(selector))
{
return "Invalid selector. Cannot be None, empty or consist only of white-space characters".ToPython();
throw new ArgumentException("Invalid selector. Cannot be None, empty or consist only of white-space characters");
}

//Covert to symbols
var symbols = PythonUtil.ConvertToSymbols(input);

//Fetch the data
var fundamentalData = GetAllFundamental(symbols, selector, start, end);

using (Py.GIL())
{
// If tickers are not a PyList, we create one
if (!PyList.IsListType(tickers))
var data = new PyDict();
foreach (var day in fundamentalData.OrderBy(x => x.Key))
{
var tmp = new PyList();
tmp.Append(tickers);
tickers = tmp;
var orderedValues = day.Value.OrderBy(x => x.Key.ID.ToString());
var columns = orderedValues.Select(x => x.Key.ID.ToString());
var values = orderedValues.Select(x => x.Value);
C-SELLERS marked this conversation as resolved.
Show resolved Hide resolved
var row = _pandas.Series(values, columns);
data.SetItem(day.Key.ToPython(), row);
}

var list = new List<Tuple<Symbol, DateTime, object>>();
return _pandas.DataFrame.from_dict(data, orient:"index");
}
}

foreach (var ticker in tickers)
{
var symbol = QuantConnect.Symbol.Create(ticker.ToString(), SecurityType.Equity, Market.USA);
var dir = new DirectoryInfo(Path.Combine(Globals.DataFolder, "equity", symbol.ID.Market, "fundamental", "fine", symbol.Value.ToLowerInvariant()));
if (!dir.Exists) continue;
/// <summary>
/// Get fundamental data from given symbols
/// </summary>
/// <param name="symbols">The symbols to retrieve fundamental data for</param>
/// <param name="selector">Selects a value from the Fundamental data to filter the request output</param>
/// <param name="start">The start date of selected data</param>
/// <param name="end">The end date of selected data</param>
/// <returns>Enumerable collection of DataDictionaries, one dictionary for each day there is data</returns>
public IEnumerable<DataDictionary<dynamic>> GetFundamental(IEnumerable<Symbol> symbols, string selector, DateTime? start = null, DateTime? end = null)
{
var data = GetAllFundamental(symbols, selector, start, end);

var config = new SubscriptionDataConfig(typeof(FineFundamental), symbol, Resolution.Daily, TimeZones.NewYork, TimeZones.NewYork, false, false, false);
foreach (var kvp in data.OrderBy(kvp => kvp.Key))
{
yield return kvp.Value;
}
}

foreach (var fileName in dir.EnumerateFiles())
{
var date = DateTime.ParseExact(fileName.Name.Substring(0, 8), DateFormat.EightCharacter, CultureInfo.InvariantCulture);
if (date < start || date > end) continue;
/// <summary>
/// Get fundamental data for a given symbol
/// </summary>
/// <param name="symbol">The symbol to retrieve fundamental data for</param>
/// <param name="selector">Selects a value from the Fundamental data to filter the request output</param>
/// <param name="start">The start date of selected data</param>
/// <param name="end">The end date of selected data</param>
/// <returns>Enumerable collection of DataDictionaries, one Dictionary for each day there is data.</returns>
public IEnumerable<DataDictionary<dynamic>> GetFundamental(Symbol symbol, string selector, DateTime? start = null, DateTime? end = null)
{
var list = new List<Symbol>
{
symbol
};

var factory = new TextSubscriptionDataSourceReader(_dataCacheProvider, config, date, false);
var source = new SubscriptionDataSource(fileName.FullName, SubscriptionTransportMedium.LocalFile);
var value = factory.Read(source).Select(x => GetPropertyValue(x, selector)).First();
return GetFundamental(list, selector, start, end);
}

list.Add(Tuple.Create(symbol, date, value));
}
}
/// <summary>
/// Get fundamental data for a given set of tickers
/// </summary>
/// <param name="tickers">The tickers to retrieve fundamental data for</param>
/// <param name="selector">Selects a value from the Fundamental data to filter the request output</param>
/// <param name="start">The start date of selected data</param>
/// <param name="end">The end date of selected data</param>
/// <returns>Enumerable collection of DataDictionaries, one dictionary for each day there is data.</returns>
public IEnumerable<DataDictionary<dynamic>> GetFundamental(IEnumerable<string> tickers, string selector, DateTime? start = null, DateTime? end = null)
{
var list = new List<Symbol>();
foreach (var ticker in tickers)
{
list.Add(QuantConnect.Symbol.Create(ticker, SecurityType.Equity, Market.USA));
}

var data = new PyDict();
foreach (var item in list.GroupBy(x => x.Item1))
{
var index = item.Select(x => x.Item2);
data.SetItem(item.Key, _pandas.Series(item.Select(x => x.Item3).ToList(), index));
}
return GetFundamental(list, selector, start, end);
}

return _pandas.DataFrame(data);
/// <summary>
/// Get fundamental data for a given ticker
/// </summary>
/// <param name="symbol">The symbol to retrieve fundamental data for</param>
/// <param name="selector">Selects a value from the Fundamental data to filter the request output</param>
/// <param name="start">The start date of selected data</param>
/// <param name="end">The end date of selected data</param>
/// <returns>Enumerable collection of DataDictionaries, one Dictionary for each day there is data.</returns>
public dynamic GetFundamental(string ticker, string selector, DateTime? start = null, DateTime? end = null)
{
//Check if its Python; PythonNet likes to convert the strings, but for python we want the DataFrame as the return object
//So we must route the function call to the Python version.
if (_isPythonNotebook)
{
return GetFundamental(ticker.ToPython(), selector, start, end);
}

var symbol = QuantConnect.Symbol.Create(ticker, SecurityType.Equity, Market.USA);
var list = new List<Symbol>
{
symbol
};

return GetFundamental(list, selector, start, end);
}

/// <summary>
Expand Down Expand Up @@ -656,5 +747,63 @@ private object GetPropertyValue(object baseData, string fullName)

return baseData;
}

/// <summary>
/// Get all fundamental data for given symbols
/// </summary>
/// <param name="symbols">The symbols to retrieve fundamental data for</param>
/// <param name="start">The start date of selected data</param>
/// <param name="end">The end date of selected data</param>
/// <returns>DataDictionary of Enumerable IBaseData</returns>
private Dictionary<DateTime, DataDictionary<dynamic>> GetAllFundamental(IEnumerable<Symbol> symbols, string selector, DateTime? start = null, DateTime? end = null)
{
//SubscriptionRequest does not except nullable DateTimes, so set a startTime and endTime
var startTime = start.HasValue ? (DateTime)start : QuantConnect.Time.BeginningOfTime;
var endTime = end.HasValue ? (DateTime)end : QuantConnect.Time.EndOfTime;

//Collection to store our results
var data = new Dictionary<DateTime, DataDictionary<dynamic>>();

//Build factory
var factory = new FineFundamentalSubscriptionEnumeratorFactory(false);
var fileProvider = new DefaultDataProvider();
C-SELLERS marked this conversation as resolved.
Show resolved Hide resolved

//Get all data for each symbol and fill our dictionary
var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount };
Parallel.ForEach(symbols, options, symbol =>
{
var config = new SubscriptionDataConfig(
typeof(FineFundamental),
symbol,
Resolution.Daily,
TimeZones.NewYork,
TimeZones.NewYork,
false,
false,
false
);
var security = Securities.CreateSecurity(symbol, config);
var request = new SubscriptionRequest(false, null, security, config, startTime.ConvertToUtc(TimeZones.NewYork), endTime.ConvertToUtc(TimeZones.NewYork));
using (var enumerator = factory.CreateEnumerator(request, fileProvider))
{
while (enumerator.MoveNext())
{
var dataPoint = string.IsNullOrWhiteSpace(selector)
? enumerator.Current
: GetPropertyValue(enumerator.Current, selector);

lock (data)
{
if (!data.ContainsKey(enumerator.Current.Time))
{
data[enumerator.Current.Time] = new DataDictionary<dynamic>(enumerator.Current.Time);
}
data[enumerator.Current.Time].Add(enumerator.Current.Symbol, dataPoint);
}
}
}
});
return data;
}
}
}