# Notebook for first experiments with Yahoo Finance package for python

Importing libraries

In [51]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

from scipy.stats import gmean

One ticker symbol

In [14]:
dat = yf.Ticker("PCO.WA")
dat.get_info()

{'address1': 'Capital House',
 'address2': '14th Floor 25 Chapel Street',
 'city': 'London',
 'zip': 'NW1 5DH',
 'country': 'United Kingdom',
 'phone': '44 20 3735 9210',
 'website': 'https://www.pepcogroup.eu',
 'industry': 'Discount Stores',
 'industryKey': 'discount-stores',
 'industryDisp': 'Discount Stores',
 'sector': 'Consumer Defensive',
 'sectorKey': 'consumer-defensive',
 'sectorDisp': 'Consumer Defensive',
 'longBusinessSummary': "Pepco Group N.V. operates as a discount variety retailer in the United Kingdom, the Republic of Ireland, Poland, and rest of Europe. The company provides clothing's, such as kids and babywear; homeware-led general merchandise; and fast-moving consumer goods. It also offers home décors, chilled and frozen foods, toys, sweets, snacks, beverages, household cleaning products, cosmetics, office supplies, pet products, gardening, health, beauty, and seasonal products. In addition, the company provides direct product sourcing, product development, and tec

In [40]:
relevant_keys = [
    'city', 'country', 'industryKey', 'sectorKey', 'fullTimeEmployees', 'currency', 'tradeable', 'quoteType',
    'financialCurrency', 'region', 'fullExchangeName', 'exchange', 'exchangeTimezoneName', 'market', 'marketCap',
    'shortName', 'ebitda', 'totalDebt', 'debtToEquity', 'totalRevenue'] # market cap

In [53]:
def imput_asset_info(relevant_info, df):
    """
    Enriches the input DataFrame with additional asset information from Yahoo Finance.

    Parameters:
    - relevant_info: List of keys to fetch from yfinance .info dictionary
    - df: DataFrame that must include a 'Ticker' column

    Returns:
    - Merged DataFrame with additional asset information
    """
    columns = ['Ticker'] + relevant_info
    distinct_tickers = df['Ticker'].unique()
    rows = []

    for ticker in distinct_tickers:
        try:
            info_dict = yf.Ticker(ticker).info
            data_row = [ticker] + [info_dict.get(field, None) for field in relevant_info]
            rows.append(data_row)
        except Exception as e:
            print(f"Warning: Could not retrieve info for {ticker}: {e}")
            continue

    df_additional_info = pd.DataFrame(rows, columns=columns)
    return pd.merge(df, df_additional_info, how='left', on='Ticker')

In [None]:
data = {
    "Ticker": ["AAPL", "GOOGL", "MSFT", "TSLA", "AMZN"],
    "Company": ["Apple Inc.", "Alphabet Inc.", "Microsoft Corp.", "Tesla Inc.", "Amazon.com Inc."],
    "Price": [165.3, 142.7, 310.1, 182.4, 127.8],
    "Sector": ["Technology", "Technology", "Technology", "Automotive", "Consumer Discretionary"]
}

df = pd.DataFrame(data)

In [54]:
imput_asset_info(relevant_info=relevant_keys, df=df)

Unnamed: 0,Ticker,Company,Price,Sector,city,country,industryKey,sectorKey,fullTimeEmployees,currency,...,fullExchangeName,exchange,exchangeTimezoneName,market,marketCap,shortName,ebitda,totalDebt,debtToEquity,totalRevenue
0,AAPL,Apple Inc.,165.3,Technology,Cupertino,United States,consumer-electronics,technology,150000,USD,...,NasdaqGS,NMS,America/New_York,us_market,3073521745920,Apple Inc.,137352003584,96798998528,145.0,395760009216
1,GOOGL,Alphabet Inc.,142.7,Technology,Mountain View,United States,internet-content-information,communication-services,183323,USD,...,NasdaqGS,NMS,America/New_York,us_market,1907418464256,Alphabet Inc.,129496997888,28137000960,8.655,350018011136
2,MSFT,Microsoft Corp.,310.1,Technology,Redmond,United States,software-infrastructure,technology,228000,USD,...,NasdaqGS,NMS,America/New_York,us_market,2783207751680,Microsoft Corporation,142041006080,102909001728,33.998,261802000384
3,TSLA,Tesla Inc.,182.4,Automotive,Austin,United States,auto-manufacturers,consumer-cyclical,125665,USD,...,NasdaqGS,NMS,America/New_York,us_market,806510198784,"Tesla, Inc.",12549999616,7528999936,9.983,95724003328
4,AMZN,Amazon.com Inc.,127.8,Consumer Discretionary,Seattle,United States,internet-retail,consumer-cyclical,1556000,USD,...,NasdaqGS,NMS,America/New_York,us_market,1916599533568,"Amazon.com, Inc.",120467996672,155400994816,54.342,637959012352


In [78]:
values = [dat.info[k] for k in relevant_keys]

In [79]:
values

['London',
 'United Kingdom',
 'discount-stores',
 'consumer-defensive',
 29462,
 'PLN',
 False,
 'EQUITY',
 'EUR',
 'US',
 'Warsaw',
 'WSE',
 'Europe/Warsaw',
 'pl_market',
 'PEPCO']

In [3]:
wig20 = [
    "ALR.WA", "ALE.WA", "BDX.WA", "CCC.WA", "CDR.WA",
    "CPS.WA", "DNP.WA", "KTY.WA", "JSW.WA", "KGH.WA",
    "KRU.WA", "LPP.WA", "MBK.WA", "OPL.WA", "PEO.WA",
    "PGE.WA", "PKN.WA", "PKO.WA", "PZU.WA", "PCO.WA"
]

This function gives us all information stored in a security, is a dict type

In [82]:
data = yf.Tickers(wig20)

In [None]:
data.tickers[ticker].fast_info['currency']

KeyError: 1

In [4]:
start = datetime.datetime(2024,11,15)
end = datetime.datetime(2025,4,4)

In [5]:
def get_stocks(tickers, start, end):
    df = yf.download(tickers, start, end)[["Close"]].reset_index()
    df.columns = [col[1] if col[0] == 'Close' else 'Date' for col in df.columns]
    return df

In [75]:
data = yf.download(wig20, period='6y', group_by='column')
    
        
data = data.reset_index()
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

[*********************100%***********************]  20 of 20 completed


In [76]:
data.head()


Price,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,ALE.WA,ALR.WA,BDX.WA,CCC.WA,CDR.WA,CPS.WA,DNP.WA,JSW.WA,KGH.WA,KRU.WA,...,KTY.WA,LPP.WA,MBK.WA,OPL.WA,PCO.WA,PEO.WA,PGE.WA,PKN.WA,PKO.WA,PZU.WA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2019-04-24,,57.345783,101.512238,222.230484,199.159058,22.301405,126.400002,56.448448,100.32296,133.77179,...,620,3621,24977,1565428,,1346956,2426609,707948,2598172,1072220
2019-04-25,,56.43705,102.329788,229.40564,197.933182,22.065054,124.900002,56.874294,97.983345,132.362793,...,3520,1731,12871,963644,,507214,1815669,809071,1507611,889308
2019-04-26,,56.389225,102.19352,230.202881,199.53627,22.55464,127.0,56.164547,98.872398,141.645599,...,503,1287,19670,994793,,427001,1389663,577813,1087240,645464
2019-04-29,,56.484879,100.422165,205.687759,200.384949,22.368935,126.0,55.738701,98.685234,139.822189,...,1365,993,19713,806089,,226199,1592924,435147,1348469,847291
2019-04-30,,56.102257,100.422165,206.684311,202.648117,22.368935,127.0,54.934322,96.252014,143.800522,...,296,1649,14952,1964384,,777053,1703505,817421,3238208,1412784


 średnie do 5 lat, począwszy od dnia: dzień, tydzień, miesiąc, kwartał, pół-rocze, rok, 2, 3, 4, 5

In [29]:
data.sort_values('Date', ascending=False)['ALE.WA'].iloc[:7]

Date
2025-04-24    33.419998
2025-04-23    33.099998
2025-04-22    31.570000
2025-04-17    31.100000
2025-04-16    30.879999
2025-04-15    31.510000
2025-04-14    31.165001
Name: ALE.WA, dtype: float64

In [None]:
def calculate_averages(df, columns, type='arithmetic'):
    # we can adjust a function so we can aggregate any values in any column i.e. Close price or Volume
    df = df[column]
    # we assume our df is sorted by descending order of dates (latest is at the top), if not we can repair it
    df = df.sort_values('Date', ascending=False)
    # gather all the tickers
    tickers = df.columns.to_list()
    # store all required periods (as number of days)
    periods = [1, 5, 63, 90, 126, 252, 504, 756, 1008, 1260] # it can be adjusted to what we consider a 'week' in a trading year
    
    # handling column names
    if column == 'Volume':
        base = f"average{column}Last{{}}"
    base = f"average{column}PriceLast{{}}"

    columns = [
        'Ticker',
        base.format('Day'),
        base.format('Week'),
        base.format('Month'),
        base.format('Quarter'),
        base.format('HalfYear'),
        base.format('1Y'),
        base.format('2Y'),
        base.format('3Y'),
        base.format('4Y'),
        base.format('5Y')
    ]

    data_rows = []
    for ticker in tickers:
        row = [ticker]
        for period in periods:
            sliced = df[ticker].iloc[:period]  # latest `period` rows
            if type == 'arithmetic':
                row.append(sliced.mean(skipna=True)) # make sure we skip NaN
            elif type == 'geometric':
                row.append(gmean(sliced.dropna()))
            else:
                raise ValueError('Invalid type of mean!')
        data_rows.append(row)

    # Create final DataFrame
    aggregated_df = pd.DataFrame(data_rows, columns=columns)

    return aggregated_df


In [71]:
calculate_averages(data, 'Close')

Unnamed: 0,Ticker,AverageClosePriceLastDay,AverageClosePriceLastWeek,AverageClosePriceLastMonth,AverageClosePriceLastQuarter,AverageClosePriceLastHalfYear,AverageClosePriceLast1Y,AverageClosePriceLast2Y,AverageClosePriceLast3Y,AverageClosePriceLast4Y,AverageClosePriceLast5Y
0,ALE.WA,33.419998,32.013999,30.206428,29.431611,30.181627,33.371468,32.683909,30.292148,35.229954,39.430427
1,ALR.WA,111.050003,108.710001,102.131746,98.119778,96.001905,96.148791,79.253722,63.228727,58.217665,49.784147
2,BDX.WA,620.200012,627.6,563.860317,534.728888,527.355554,587.669839,537.547524,434.967158,378.172183,340.139097
3,CCC.WA,226.399994,229.439999,201.380952,196.628888,195.415079,164.746627,108.541349,86.038294,90.052619,85.293087
4,CDR.WA,224.699997,223.6,217.438095,211.040556,197.906746,175.182715,149.64772,137.565917,146.845783,182.71166
5,CPS.WA,17.780001,17.766,15.294683,15.035111,14.484762,13.521401,13.549371,15.207926,18.834347,19.983492
6,DNP.WA,519.200012,511.940002,471.766667,453.566666,429.614285,398.311508,411.032936,389.244312,370.702877,342.589762
7,JSW.WA,24.110001,23.692,24.38873,23.779111,24.451429,26.178492,33.836964,40.20541,41.255446,37.678294
8,KGH.WA,124.449997,118.639999,128.369841,126.511111,130.474206,137.08418,124.718956,121.468724,131.383922,131.369803
9,KRU.WA,388.600006,381.159998,405.919048,411.534445,415.811905,429.741733,418.060288,365.952519,340.16094,295.511541


In [64]:
data = yf.download(wig20, group_by='Ticker', period= '1d')
data.head()

[*********************100%***********************]  20 of 20 completed


Ticker,LPP.WA,LPP.WA,LPP.WA,LPP.WA,LPP.WA,PZU.WA,PZU.WA,PZU.WA,PZU.WA,PZU.WA,...,CCC.WA,CCC.WA,CCC.WA,CCC.WA,CCC.WA,MBK.WA,MBK.WA,MBK.WA,MBK.WA,MBK.WA
Price,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-04-23,15615.0,15845.0,15615.0,15760.0,501,58.119999,58.799999,57.939999,58.639999,205305,...,228.5,231.399994,226.699997,230.0,31403,831.0,839.0,830.799988,832.0,2856


In [None]:

data = data.stack(level=0).rename_axis(['Date', 'Ticker']).reset_index(level=1)

data['Change'] = data['Close'] - data['Open']
data['Daily Return'] = (data['Close'] - data['Open']) / data['Open']
data['Range'] = data['High'] - data['Low']
data['Volatility Ratio'] = data['Range'] / data['Open']
data['Upper Wick'] = data['High'] - np.maximum(data['Open'], data['Close'])
data['Lower Wick'] = np.minimum(data['Open'], data['Close']) - data['Low']
data['Candle type'] = np.where(data['Close'] > data['Open'], 1, 0)  # 1 means bullish, 0 means bearish

data.head(20)

This function gives us important dates of a security

In [24]:
dat.calendar 

{'Dividend Date': datetime.date(2025, 6, 12),
 'Ex-Dividend Date': datetime.date(2025, 5, 15),
 'Earnings Date': [datetime.date(2025, 4, 23), datetime.date(2025, 4, 28)],
 'Earnings High': 3.56,
 'Earnings Low': 3.16,
 'Earnings Average': 3.22562,
 'Revenue High': 69897682310,
 'Revenue Low': 67140000000,
 'Revenue Average': 68536220780}

In [25]:
dat.analyst_price_targets

{'current': 359.84,
 'high': 650.0,
 'low': 415.0,
 'mean': 503.86472,
 'median': 500.0}

In [26]:
dat.quarterly_income_stmt

Unnamed: 0,2024-12-31,2024-09-30,2024-06-30,2024-03-31,2023-12-31
Tax Effect Of Unusual Items,-203220000.0,57190000.0,-16263853.211009,-33660000.0,-29160000.0
Tax Rate For Calcs,0.18,0.19,0.191339,0.18,0.18
Normalized EBITDA,37915000000.0,37933000000.0,34416000000.0,33741000000.0,33556000000.0
Total Unusual Items,-1129000000.0,301000000.0,-85000000.0,-187000000.0,-162000000.0
Total Unusual Items Excluding Goodwill,-1129000000.0,301000000.0,-85000000.0,-187000000.0,-162000000.0
Net Income From Continuing Operation Net Minority Interest,24108000000.0,24667000000.0,22036000000.0,21939000000.0,21870000000.0
Reconciled Depreciation,6827000000.0,7383000000.0,6380000000.0,6027000000.0,5959000000.0
Reconciled Cost Of Revenue,21799000000.0,20099000000.0,19684000000.0,18505000000.0,19623000000.0
EBITDA,36786000000.0,38234000000.0,34331000000.0,33554000000.0,33394000000.0
EBIT,29959000000.0,30851000000.0,27951000000.0,27527000000.0,27435000000.0


Period must be of the format 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max, etc.
This function gives us information about historical values of a security

In [27]:
dat.history(period='5d')

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-03-31 00:00:00-04:00,372.540009,377.070007,367.23999,375.390015,35184700,0.0,0.0
2025-04-01 00:00:00-04:00,374.649994,382.850006,373.230011,382.190002,19689500,0.0,0.0
2025-04-02 00:00:00-04:00,377.970001,385.079987,376.619995,382.140015,16092600,0.0,0.0
2025-04-03 00:00:00-04:00,374.790009,377.480011,369.350006,373.109985,30198000,0.0,0.0
2025-04-04 00:00:00-04:00,364.130005,374.589996,359.480011,359.839996,49138700,0.0,0.0


In [28]:
dat.option_chain(dat.options[0]).calls

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency
0,MSFT250411C00230000,2025-04-04 14:53:22+00:00,230.0,137.15,128.15,132.5,-18.950012,-12.139662,3.0,0,1.586916,True,REGULAR,USD
1,MSFT250411C00260000,2025-03-07 20:54:19+00:00,260.0,133.5,98.15,102.5,0.0,0.0,2.0,1,1.200199,True,REGULAR,USD
2,MSFT250411C00290000,2025-03-28 14:04:20+00:00,290.0,97.7,68.7,73.0,0.0,0.0,1.0,1,0.973633,True,REGULAR,USD
3,MSFT250411C00300000,2025-04-04 17:46:49+00:00,300.0,66.54,58.85,63.0,-9.68,-12.700079,303.0,24,0.86133,True,REGULAR,USD
4,MSFT250411C00305000,2025-03-18 15:48:24+00:00,305.0,78.48,58.7,58.5,0.0,0.0,,1,1.119877,True,REGULAR,USD
5,MSFT250411C00310000,2025-04-04 19:07:18+00:00,310.0,52.0,53.85,53.5,-9.689999,-15.707569,1.0,6003,1.048711,True,REGULAR,USD
6,MSFT250411C00320000,2025-04-04 18:31:40+00:00,320.0,44.05,44.5,44.0,-31.350002,-41.57825,20.0,1,0.93921,True,REGULAR,USD
7,MSFT250411C00325000,2025-04-04 19:32:17+00:00,325.0,41.27,40.3,38.3,-10.48,-20.251207,33.0,15,0.86194,True,REGULAR,USD
8,MSFT250411C00330000,2025-04-04 17:38:26+00:00,330.0,34.1,36.2,33.9,-18.220001,-34.82416,25.0,13,0.833254,True,REGULAR,USD
9,MSFT250411C00335000,2025-04-04 15:37:12+00:00,335.0,34.9,31.95,29.65,-3.979999,-10.236624,8.0,27,0.796389,True,REGULAR,USD


Multiple ticker symbols

In [29]:
tickers = yf.Tickers('MSFT AAPL GOOG')
tickers.tickers['MSFT'].info
yf.download(['MSFT', 'AAPL', 'GOOG'], period='1mo')

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  3 of 3 completed


Price,Close,Close,Close,High,High,High,Low,Low,Low,Open,Open,Open,Volume,Volume,Volume
Ticker,AAPL,GOOG,MSFT,AAPL,GOOG,MSFT,AAPL,GOOG,MSFT,AAPL,GOOG,MSFT,AAPL,GOOG,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2025-03-05,235.740005,174.790878,401.019989,236.550003,175.550008,401.670013,229.229996,170.735486,388.809998,235.419998,172.123918,389.339996,47227600,18848000,23433100
2025-03-06,235.330002,174.011765,396.890015,237.860001,176.528886,402.149994,233.160004,172.31169,392.679993,234.440002,172.35365,394.279999,45170400,19082400,23304600
2025-03-07,239.070007,175.550003,393.309998,241.369995,176.698688,394.799988,234.759995,172.053986,385.540009,235.110001,173.044861,392.320007,46273600,16395300,22034100
2025-03-10,227.479996,167.809998,380.160004,236.160004,170.449997,386.399994,224.220001,165.565002,377.220001,235.539993,170.160004,385.839996,72071200,28990700,32840100
2025-03-11,220.839996,165.979996,380.450012,225.839996,168.654999,386.0,217.449997,163.240005,376.910004,223.809998,166.679993,379.0,76137400,23705900,30380200
2025-03-12,216.979996,169.0,383.269989,221.75,169.529999,385.220001,214.910004,165.479996,378.950012,220.139999,168.470001,382.950012,62547500,19880100,24253600
2025-03-13,209.679993,164.729996,378.769989,216.839996,168.119995,385.320007,208.419998,164.070007,377.450012,215.949997,167.979996,383.160004,61368300,15206200,20473000
2025-03-14,213.490005,167.619995,388.559998,213.949997,168.25,390.230011,209.580002,164.509995,379.51001,211.25,165.315002,379.779999,60107600,18611100,19952800
2025-03-17,214.0,166.570007,388.700012,215.220001,168.460007,392.709991,209.970001,165.809998,385.570007,213.309998,167.324997,386.700012,48073400,17839100,22474300
2025-03-18,212.690002,162.669998,383.519989,215.149994,166.440002,387.369995,211.490005,158.800003,381.100006,214.160004,165.960007,387.070007,42432400,24616800,19486900


In [31]:
spy = yf.Ticker('SPY').funds_data

In [32]:
spy.description

'The trust seeks to achieve its investment objective by holding a portfolio of the common stocks that are included in the index, with the weight of each stock in the portfolio substantially corresponding to the weight of such stock in the index.'

In [33]:
spy.top_holdings

Unnamed: 0_level_0,Name,Holding Percent
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,Apple Inc,0.069954
MSFT,Microsoft Corp,0.058503
NVDA,NVIDIA Corp,0.055643
AMZN,Amazon.com Inc,0.03762
META,Meta Platforms Inc Class A,0.02646
BRK-B,Berkshire Hathaway Inc Class B,0.020465
GOOGL,Alphabet Inc Class A,0.01891
AVGO,Broadcom Inc,0.016453
GOOG,Alphabet Inc Class C,0.015483
TSLA,Tesla Inc,0.015204
