In [1]:
import sys
print(sys.version) # 3.12
import numpy as np # !pip3 install numpy
import pandas as pd # !pip3 install pandas
import matplotlib.pyplot as plt # !pip3 install matplotlib
import seaborn as sns # !ip install seaborn
import yfinance as yf # !pip3 install yfinance

3.11.6 (main, Oct  2 2023, 13:45:54) [Clang 15.0.0 (clang-1500.0.40.1)]


In [3]:
def retrieve_hist_assets(assets: list, window_range="2y") -> list:
    """Retrieve asset history values.

    Ecrire une fonction qui prend en entrée une liste de 5 actifs et qui renvoie les prix journaliers sur
    une fenêtre de 2 à 3 ans (le choix est donné à l’utilisateur) à partir de Yahoo Finance.

    N.B.:
        - package link: https://pypi.org/project/yfinance/
    :param assets: list of assets (max 5)
    :return: asset historical list
    """
    if len(assets) > 5:
        raise ValueError("On ne peut pas avoir plus de 5 assets!")
    if not window_range in ["2y", "3y"]:
        raise ValueError("Tu peux seulement demander 2y -> historique 2 ans ou 3y -> ...")
    df = pd.DataFrame()
    for asset in assets:
        msft = yf.Ticker(asset)
        hist = msft.history(period=window_range)
        hist_df = pd.DataFrame(hist)
        hist_df["actif"] = asset
        hist_df = hist_df.reset_index()
        hist_df["jour"] = hist_df.Date.dt.strftime('%Y-%m-%d')
        hist_df["mois"] = hist_df.Date.dt.strftime('%Y-%m')
        hist_df["annee"] = hist_df.Date.dt.strftime('%Y')
        if df.empty:
            df = hist_df.copy()
        else:
            df = pd.concat([df, hist_df])
    return df.reset_index()

df = retrieve_hist_assets(assets=["AAPL", "MSFT", "AMZN"], window_range="3y")
df.sample(30)

Unnamed: 0,index,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,actif,jour,mois,annee
1233,479,2022-11-28 00:00:00-05:00,243.917217,244.482199,238.683623,239.635178,24778200,0.0,0.0,MSFT,2022-11-28,2022-11,2022
2055,547,2023-03-08 00:00:00-05:00,93.599998,94.169998,92.18,93.919998,44899100,0.0,0.0,AMZN,2023-03-08,2023-03,2023
1349,595,2023-05-16 00:00:00-04:00,307.87612,311.731656,307.87612,309.774078,26730300,0.0,0.0,MSFT,2023-05-16,2023-05,2023
420,420,2022-09-02 00:00:00-04:00,158.598946,159.204551,153.853388,154.687332,76957800,0.0,0.0,AAPL,2022-09-02,2022-09,2022
952,198,2021-10-15 00:00:00-04:00,296.358529,298.426801,294.574529,298.191528,25384800,0.0,0.0,MSFT,2021-10-15,2021-10,2021
264,264,2022-01-20 00:00:00-05:00,165.093679,167.763175,162.325307,162.651581,91420500,0.0,0.0,AAPL,2022-01-20,2022-01,2022
316,316,2022-04-05 00:00:00-04:00,175.718445,176.510419,172.669357,173.302933,73401800,0.0,0.0,AAPL,2022-04-05,2022-04,2022
1728,220,2021-11-16 00:00:00-05:00,176.949997,178.824997,176.257507,177.035004,44342000,0.0,0.0,AMZN,2021-11-16,2021-11,2021
1731,223,2021-11-19 00:00:00-05:00,185.634506,188.107498,183.785995,183.828506,98734000,0.0,0.0,AMZN,2021-11-19,2021-11,2021
2039,531,2023-02-13 00:00:00-05:00,97.849998,99.68,96.910004,99.540001,52841500,0.0,0.0,AMZN,2023-02-13,2023-02,2023


In [15]:
def get_rendement(df: pd.DataFrame) -> pd.DataFrame:
    """
    Écrire une fonction qui calcule le rendement quotidien de chaque actif. Intégrez dans cette fonction
    une variable pour calculer le rendement espéré annualisé de chaque actif. Représentez dans le
    même graphique les rendements quotidiens des 5 actifs avec des légendes claires.
    
    N.B.:
        - I used np.log -> est-ce que c'est le bon log
        - d'ailleurs shift(1) ou shift(-1)
    """
    new_df = df.copy()
    new_df["Rendement_Qtot"] = np.log(
        new_df.Close/new_df.Close.shift(1)
    )
    new_df = new_df[~new_df.index.isin(list(range(100)))]
    rendement_moyen_df = (
        new_df
        .groupby(["mois", "actif"])
        .agg({"Date": "first", "Rendement_Qtot": "mean"})
        .reset_index()
        .rename(
            columns={"Rendement_Qtot": "Rendement_Qtot_mean_month"}
        )
    )
    # new_df = (
    #     new_df
    #     .join(
    #         rendement_moyen_df,
    #         on=["Date", "actif"],
    #         how="left",
    #     )
    # )
    fig = plt.figure(figsize=(19, 11))
    plt.title("Evolution journalière du rendement par actif.")
    plt.ylabel("Rendenment journalier (en USD).")
    plt.xlabel("Date.")
    ticks_range = {
            index: new_df[new_df.index == index].mois.values[0]
        for index in new_df.index.unique()
        if index in [100, 200, 300]
    }
    # plt.xticks(ticks_range.keys(), ticks_range.values(), rotation='vertical')
    # plt.ylim((df['3. low'].min()-1, df['2. high'].max()+1))
    sns.lineplot(x=new_df["index"], y=new_df["Rendement_Qtot"], hue=new_df["actif"], markers=True, dashes=False)
    plt.show()
    return new_df

df_2 = get_rendement(df=df)
df_2.head()

ValueError: len(left_on) must equal the number of levels in the index of "right"

In [29]:
def get_volatility_and_export_to_csv(df: pd.DataFrame, filename="loulou.csv") -> None:
    """Compute volatility and export to .csv"""
    print(f"Creation du fichier: {filename}")
    df.to_csv(filename, index=False, sep=";")

get_volatility_and_export_to_csv(df=df)