https://medium.com/mlearning-ai/using-machine-learning-and-company-fundamentals-for-beating-the-stock-market-fa2d4ac438a7

In [3]:
import pandas as pd

# read dataset
df = pd.read_csv("dataset.csv", parse_dates=["date"])

# format index
df = df.set_index(["ticker", "date"])

In [5]:

# if the price increases by more than x%, we label it as "True" or "Buy"
threshold = 0.05  # 5%

# calculate the return within the month
df["return_month"] = (df["adjClose"] / df["adjOpen"]) - 1

# create the target
df["target"] = df["return_month"] >= threshold

df["target"]

ticker  date      
AAPL    2000-01-31    False
        2000-02-29     True
        2000-03-31     True
        2000-04-30    False
        2000-05-31    False
                      ...  
WMT     2020-08-31     True
        2020-09-30    False
        2020-10-31    False
        2020-11-30     True
        2020-12-31    False
Name: target, Length: 7160, dtype: bool

In [6]:
# list of features
features = [
    "price_rate_of_change_1M",
    "price_rate_of_change_3M",
    "epsDil",
    "return_on_assets",
    "return_on_equity",
    "price_to_earnings_ratio",
    "debt_to_equity_ratio",
]

# shift the value of the features by one period (make sure to use groupby!)
df[features] = df.groupby("ticker")[features].shift(1)

In [7]:
# remove the first row for each ticker to get rid of the NaN created after doing the shift
df = df.loc[df.groupby("ticker").cumcount() > 0]

In [12]:
from pandas_profiling import ProfileReport

profile = ProfileReport(df, title="Pandas Profiling Report", minimal=True)

profile.to_notebook_iframe()

ModuleNotFoundError: No module named 'pandas_profiling'

In [9]:
!pip install pandas_profiling

Collecting pandas_profiling


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\HP\\anaconda3\\Lib\\site-packages\\~arkupsafe\\_speedups.cp39-win_amd64.pyd'
Consider using the `--user` option or check the permissions.



  Downloading pandas_profiling-3.2.0-py2.py3-none-any.whl (262 kB)
Collecting multimethod>=1.4
  Downloading multimethod-1.8-py3-none-any.whl (9.8 kB)
Collecting visions[type_image_path]==0.7.4
  Downloading visions-0.7.4-py3-none-any.whl (102 kB)
Collecting pydantic>=1.8.1
  Downloading pydantic-1.9.1-cp39-cp39-win_amd64.whl (2.0 MB)
Collecting markupsafe~=2.1.1
  Downloading MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl (17 kB)
Collecting phik>=0.11.1
  Downloading phik-0.12.2-cp39-cp39-win_amd64.whl (685 kB)
Collecting tangled-up-in-unicode==0.2.0
  Downloading tangled_up_in_unicode-0.2.0-py3-none-any.whl (4.7 MB)
Collecting htmlmin>=0.1.12
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
Collecting imagehash
  Downloading ImageHash-4.2.1.tar.gz (812 kB)




Building wheels for collected packages: htmlmin, imagehash
  Building wheel for htmlmin (setup.py): started
  Building wheel for htmlmin (setup.py): finished with status 'done'
  Created wheel for htmlmin: filename=htmlmin-0.1.12-py3-none-any.whl size=27098 sha256=64729c1739c7b8df1596cacf0da9697de0c7301a9dc666e9d3e2bb5fcac78840
  Stored in directory: c:\users\hp\appdata\local\pip\cache\wheels\1d\05\04\c6d7d3b66539d9e659ac6dfe81e2d0fd4c1a8316cc5a403300
  Building wheel for imagehash (setup.py): started
  Building wheel for imagehash (setup.py): finished with status 'done'
  Created wheel for imagehash: filename=ImageHash-4.2.1-py2.py3-none-any.whl size=295207 sha256=eef166e4c91503bcdead2c6e07080e752f202666e9a503ed94464b9a70c47405
  Stored in directory: c:\users\hp\appdata\local\pip\cache\wheels\51\f9\a5\740af2fdb0ad1edf79aabdc41531be0b6f0b2e2be684c388cf
Successfully built htmlmin imagehash
Installing collected packages: tangled-up-in-unicode, multimethod, visions, markupsafe, imagehash

In [None]:
split_date = 2020

df_train = df.loc[df.index.get_level_values("date").year < split_date]
df_test = df.loc[df.index.get_level_values("date").year == split_date]

In [None]:
from lightgbm import LGBMClassifier

# define classifier
estimator = LGBMClassifier(
    is_unbalance=True,
    max_depth=4,
    num_leaves=8,
    min_child_samples=400,
    n_estimators=50,
)

# fit classifier on training data
estimator.fit(df_train[features], df_train["target"])

In [None]:
# make prediction using test data
df_test["buy"] = estimator.predict(df_test[features])

In [None]:
# select only the stocks that were picked by the model
df_buy = df_test.loc[df_test["buy"] == True][["return_month", "target", "buy"]]

In [None]:
df_results = (
    df_buy.reset_index()
    .groupby("date")
    .agg({"ticker": "count", "return_month": "mean"})
)

In [None]:
df_results.describe()

In [None]:
import numpy as np

def sharpe(s_return: pd.Series, annualize: int, rf: float = 0) -> float:
    """
    Calculate sharpe ratio

    :param s_return: pd.Series with return
    :param annualize: int periods to use for annualization (252 daily, 12 monthly, 4 quarterly)
    :param rf: float risk-free rate
    :return: float sharpe ratio
    """
    # (mean - rf) / std
    sharpe_ratio = (s_return.mean() - rf) / s_return.std()

    # annualize
    sharpe_ratio = sharpe_ratio * np.sqrt(annualize)

    return sharpe_ratio