In [54]:
import pandas as pd
import yfinance as yf
import yahoo_fin.stock_info as si
from yahoo_fin.stock_info import get_data
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from ipywidgets import interact, widgets
from datetime import timedelta,datetime

1. Exploration

We can take APPLE as a first example:

In [55]:
nas_aapl= get_data("aapl", start_date="11/30/2019", end_date="11/30/2024", index_as_date = False, interval="1d") #différents interval (1m to 3months)
nas_aapl

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker
0,2019-12-02,66.817497,67.062500,65.862503,66.040001,64.024628,94487200,AAPL
1,2019-12-03,64.577499,64.882500,64.072502,64.862503,62.883076,114430400,AAPL
2,2019-12-04,65.267502,65.827499,65.169998,65.434998,63.438084,67181600,AAPL
3,2019-12-05,65.947502,66.472504,65.682503,66.394997,64.368782,74424400,AAPL
4,2019-12-06,66.870003,67.750000,66.824997,67.677498,65.612160,106075600,AAPL
...,...,...,...,...,...,...,...,...
1253,2024-11-22,228.059998,230.720001,228.059998,229.869995,229.869995,38168300,AAPL
1254,2024-11-25,231.460007,233.250000,229.740005,232.869995,232.869995,90152800,AAPL
1255,2024-11-26,233.330002,235.570007,233.330002,235.059998,235.059998,45986200,AAPL
1256,2024-11-27,234.470001,235.690002,233.809998,234.929993,234.929993,33498400,AAPL


In [56]:
print(nas_aapl.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1258 non-null   datetime64[ns]
 1   open      1258 non-null   float64       
 2   high      1258 non-null   float64       
 3   low       1258 non-null   float64       
 4   close     1258 non-null   float64       
 5   adjclose  1258 non-null   float64       
 6   volume    1258 non-null   int64         
 7   ticker    1258 non-null   object        
dtypes: datetime64[ns](1), float64(5), int64(1), object(1)
memory usage: 78.8+ KB
None


2. Pre-processing

First we can check how much different tickers there are in the NASDAQ stock market as we want to work on this specific market

In [57]:
nas_list=si.tickers_nasdaq()
print("Tickers in Nasdaq:", len(nas_list))
print(nas_list[0:15])
nasdaq_list=nas_list[0:15]

Tickers in Nasdaq: 4811
['AACG', 'AADI', 'AADR', 'AAL', 'AAME', 'AAOI', 'AAON', 'AAPB', 'AAPD', 'AAPL', 'AAPU', 'AAXJ', 'ABAT', 'ABCL', 'ABCS']


We put the tickers we want in a dataframe so we can access it by the name of the ticker:

In [58]:
dfday=pd.DataFrame()
dfmin=pd.DataFrame()
dateToday=datetime.today().strftime("%Y-%m-%d")
date7days=(datetime.today()-timedelta(days=7)).strftime("%Y-%m-%d")
for ticker in nasdaq_list:
    data_tickers_d= get_data(ticker, start_date="11/30/2014", index_as_date = True, interval="1d")
    dfday=pd.concat([dfday,data_tickers_d])
    data_tickers_min=get_data(ticker, start_date=date7days, index_as_date = True, interval="1m")
    dfmin=pd.concat([dfmin,data_tickers_min])

def dataEng(data):
    df=data
    df.reset_index(inplace=True)
    df.rename(columns={"index": "date"}, inplace=True)
    df["date"]=pd.to_datetime(df["date"]) #To put the right date type
    df["ticker"]=df["ticker"].astype("string") #Was an object type and we put it as a String type
    df["variation"]=df["high"]-df["low"] #Variation between the highest value of the day and the lowest
    df["daily_return"]=df.groupby("ticker")["close"].pct_change()*100
    df=df.dropna()
    return df

df_day=dataEng(dfday)
df_min=dataEng(dfmin)


The default fill_method='ffill' in SeriesGroupBy.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



Example of how to access the data about a ticker:

In [59]:
df_min.head()

Unnamed: 0,date,open,high,low,close,volume,ticker,variation,daily_return
9,2024-12-13 15:02:00,0.8702,0.8702,0.8702,0.8702,261.0,AACG,0.0,0.0
15,2024-12-13 15:08:00,0.8941,0.8941,0.8941,0.8941,1248.0,AACG,0.0,2.746499
47,2024-12-13 15:40:00,0.8743,0.8892,0.8743,0.8892,670.0,AACG,0.0149,-0.548041
66,2024-12-13 15:59:00,0.8901,0.8901,0.8901,0.8901,295.0,AACG,0.0,0.101218
109,2024-12-13 16:42:00,0.8899,0.8899,0.8899,0.8899,1226.0,AACG,0.0,-0.022466


In [60]:
print(dfday.isna().sum())

date             0
open             0
high             0
low              0
close            0
adjclose         0
volume           0
ticker           0
variation        0
daily_return    15
dtype: int64


3. Analysis and visualizations

In [113]:
def filter_data_by_period(ticker,periode):
    dateToday=datetime.today()

    if periode=="1 Day":
        yesterday=dateToday-timedelta(days=1)
        start_date=yesterday.replace(hour=0,minute=0,second=0,microsecond=0)
    elif periode=="1 Week":
        start_date=dateToday-timedelta(weeks=1)
    elif periode=="1 Month":
        start_date=dateToday-timedelta(weeks=4)
    elif periode=="6 Months":
        start_date=dateToday-timedelta(weeks=26)
    elif periode=="1 Year":
        start_date=dateToday-timedelta(weeks=52)
    elif periode=="5 Years":
        start_date=dateToday-timedelta(weeks=260)

    if (periode=="1 Day") or (periode=="1 Week"):
        filtered=df_min[(df_min["date"]>=start_date)&(df_min["ticker"]==ticker)]
    else:
        filtered=df_day[(df_day["date"]>=start_date)&(df_day["ticker"]==ticker)]
    
    filtered=filtered.sort_values(by="date")
    return filtered

def plot_ticker_with_period(ticker,periode):
    sub=filter_data_by_period(ticker,periode)
    if(periode=="1 Day"):
        sub.loc[sub["date"].diff()>timedelta(hours=12),"close"]=None
        sub["heure"]=sub["date"].dt.strftime("%d %H:%M")
        sub=sub.sort_values(by="date")
        x_label=sub["heure"]
    elif(periode=="1 Week"):
        sub.loc[sub["date"].diff()>timedelta(hours=12),"close"]=None
        sub=sub.sort_values(by="date")
        sub["day"]=sub["date"].dt.strftime("%d %H:%M")
        x_label=sub["day"]
    else:
        sub=sub.sort_values(by="date")
        x_label=sub["date"]
    
    fig=go.Figure()
    fig.add_trace(go.Scatter(
        x=x_label,
        y=sub["close"],
        mode="lines",
        name=f"Close value ({ticker})",
        line=dict(color="blue",width=2),
        connectgaps=False
    ))
    if(periode=="1 Day"):
        titlex="Hour"
        ntickss=24
    elif((periode=="1 Week")):
        titlex="Date"
        ntickss=7
    else:
        titlex="Date"

    if((periode=="1 Day")or(periode=="1 Week")):
        xaxiss=dict(title=titlex,type="category",nticks=ntickss,showgrid=True)
    else:
        xaxiss=dict(title=titlex,showgrid=True)

    fig.update_layout(
        title=f"Close values for {ticker} ({periode})",
        xaxis=xaxiss,
        yaxis_title="Close value (in $)",
        template="plotly_white"
    )
    
    fig.show()

tickers=nasdaq_list
periode=["1 Day","1 Week","1 Month","6 Months","1 Year","5 Years"]

interact(
    plot_ticker_with_period,
    ticker=widgets.Dropdown(options=tickers,description="Select Ticker: "),
    periode=widgets.Dropdown(options=periode,description="Select Period: ")
)

interactive(children=(Dropdown(description='Select Ticker: ', options=('AACG', 'AADI', 'AADR', 'AAL', 'AAME', …

<function __main__.plot_ticker_with_period(ticker, periode)>