In [32]:
import pandas as pd
import yfinance as yf
import yahoo_fin.stock_info as si
from yahoo_fin.stock_info import get_data
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from ipywidgets import interact, widgets,Output,VBox
from datetime import timedelta,datetime
from IPython.display import display, clear_output

1. Exploration

We can take APPLE as a first example:

In [2]:
nas_aapl= get_data("aapl", start_date="11/30/2019", end_date="11/30/2024", index_as_date = False, interval="1d") #différents interval (1m to 3months)
nas_aapl

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker
0,2019-12-02,66.817497,67.062500,65.862503,66.040001,64.024628,94487200,AAPL
1,2019-12-03,64.577499,64.882500,64.072502,64.862503,62.883064,114430400,AAPL
2,2019-12-04,65.267502,65.827499,65.169998,65.434998,63.438084,67181600,AAPL
3,2019-12-05,65.947502,66.472504,65.682503,66.394997,64.368782,74424400,AAPL
4,2019-12-06,66.870003,67.750000,66.824997,67.677498,65.612152,106075600,AAPL
...,...,...,...,...,...,...,...,...
1253,2024-11-22,228.059998,230.720001,228.059998,229.869995,229.869995,38168300,AAPL
1254,2024-11-25,231.460007,233.250000,229.740005,232.869995,232.869995,90152800,AAPL
1255,2024-11-26,233.330002,235.570007,233.330002,235.059998,235.059998,45986200,AAPL
1256,2024-11-27,234.470001,235.690002,233.809998,234.929993,234.929993,33498400,AAPL


In [3]:
print(nas_aapl.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1258 non-null   datetime64[ns]
 1   open      1258 non-null   float64       
 2   high      1258 non-null   float64       
 3   low       1258 non-null   float64       
 4   close     1258 non-null   float64       
 5   adjclose  1258 non-null   float64       
 6   volume    1258 non-null   int64         
 7   ticker    1258 non-null   object        
dtypes: datetime64[ns](1), float64(5), int64(1), object(1)
memory usage: 78.8+ KB
None


2. Pre-processing

First we can check how much different tickers there are in the NASDAQ stock market as we want to work on this specific market

In [4]:
nas_list=si.tickers_nasdaq()
print("Tickers in Nasdaq:", len(nas_list))
print(nas_list[0:30])
nasdaq_list=nas_list[0:30]

Tickers in Nasdaq: 4800
['AACG', 'AADI', 'AADR', 'AAL', 'AAME', 'AAOI', 'AAON', 'AAPB', 'AAPD', 'AAPL', 'AAPU', 'AAXJ', 'ABAT', 'ABCL', 'ABCS', 'ABEO', 'ABL', 'ABLLL', 'ABLLW', 'ABLV', 'ABLVW', 'ABNB', 'ABOS', 'ABP', 'ABPWW', 'ABSI', 'ABTS', 'ABUS', 'ABVC', 'ABVE']


We put the tickers we want in a dataframe so we can access it by the name of the ticker:

In [5]:
dfday=pd.DataFrame()
dfmin=pd.DataFrame()
dateToday=datetime.today().strftime("%Y-%m-%d")
date7days=(datetime.today()-timedelta(days=7)).strftime("%Y-%m-%d")
valid_nasdaq_list=[]
for ticker in nasdaq_list:
    data_tickers_min=get_data(ticker, start_date=date7days, index_as_date = True, interval="1m")
    data_tickers_d= get_data(ticker, start_date="11/30/2014", index_as_date = True, interval="1d")
    if((data_tickers_min["close"].count()>100)and(data_tickers_d["close"].count()>100)): #we put this treshold to remove tickers with small amount of data
        dfmin=pd.concat([dfmin,data_tickers_min])
        dfday=pd.concat([dfday,data_tickers_d])
        valid_nasdaq_list.append(ticker)
    else:
        print(f"{ticker} removed")

def dataEng(data):
    df=data
    df.reset_index(inplace=True)
    df.rename(columns={"index": "date"}, inplace=True)
    df["date"]=pd.to_datetime(df["date"]) #To put the right date type
    df["ticker"]=df["ticker"].astype("string") #Was an object type and we put it as a String type
    df["variation"]=df["high"]-df["low"] #Variation between the highest value of the day and the lowest
    #df["daily_return"]=df.groupby("ticker")["close"].pct_change()*100
    df=df.dropna()
    return df

df_day=dataEng(dfday)
df_min=dataEng(dfmin)

df_day["return"]=df_day.groupby("ticker")["close"].pct_change()

AADR removed
AAME removed
ABCS removed
ABLLW removed
ABLV removed
ABLVW removed
ABPWW removed


We calculate the sharp return ratio and explain the meaning of it

In [19]:
sharpReturnDf=pd.DataFrame()
sharpReturnDf["ticker"]=valid_nasdaq_list
risk_free=0.02/252 #2%/per year cause there are 252 days of open stock market per year

for ticker in valid_nasdaq_list:
    tick=yf.Ticker(ticker)
    info=tick.info
    
    peRatio=info.get("trailingPE")
    betaRatio=info.get("beta")
    revenueGrowth=info.get("revenueGrowth")
    
    dfreturn=df_day[df_day["ticker"]==ticker]
    returnR=dfreturn["return"].mean()
    vola=dfreturn["return"].std()

    sharpReturnDf.loc[sharpReturnDf["ticker"]==ticker,"sharpReturn"]=(returnR-risk_free)/vola #dataframe of sharpreturn ratio for each chart
    sharpReturnDf.loc[sharpReturnDf["ticker"]==ticker,"peRatio"]=peRatio
    sharpReturnDf.loc[sharpReturnDf["ticker"]==ticker,"betaRatio"]=betaRatio
    sharpReturnDf.loc[sharpReturnDf["ticker"]==ticker,"vola"]=vola
    sharpReturnDf.loc[sharpReturnDf["ticker"]==ticker,"revenueGrowth"]=revenueGrowth

def sharpRatioLabel(ratio):
    if ratio<0:
        return "Bad"
    if ((ratio>0) & (ratio<1)):
        return "Not so bad"
    if ((ratio>=1) & (ratio<2)):
        return "Good"
    if (ratio >=2):
        return "Amazing"

def longTermScore(line):
    score=0
    if ((line["peRatio"]!=None) and (line["peRatio"]<20)):
        score+=3 #PE ratio is 1.5 more important than the revenue growth and the beta ratio
    if ((line["revenueGrowth"]!=None)and(line["revenueGrowth"]>0.1)):
        score+=2
    if ((line["betaRatio"]!=None)and(line["betaRatio"]<1)):
        score+=2
    return score

def shortTermScore(line):
    score=0
    if (line["sharpReturn"]>1):
        score+=3
    if line["betaRatio"]>1:
        score+=2
    if line["vola"]>0.02:
        score+=2
    return score

sharpReturnDf["sharpRatioMeaning"]=sharpReturnDf["sharpReturn"].apply(sharpRatioLabel)
sharpReturnDf["longTermScore"]=sharpReturnDf.apply(longTermScore,axis=1)
sharpReturnDf["shortTermScore"]=sharpReturnDf.apply(shortTermScore,axis=1)

sharpReturnDf=sharpReturnDf.sort_values(by=["longTermScore","sharpReturn"],ascending=[False,False])
sharpReturnDf.head()

Unnamed: 0,ticker,sharpReturn,peRatio,betaRatio,vola,revenueGrowth,sharpRatioMeaning,longTermScore,shortTermScore
4,AAON,0.047165,54.44298,0.791,0.021758,0.168,Not so bad,4,2
0,AACG,0.020213,,0.782,0.199474,0.131,Not so bad,4,2
21,ABVC,0.019949,,0.816,82.80195,18.175,Not so bad,4,2
13,ABL,0.001755,,0.148,0.026246,0.333,Not so bad,4,2
1,AADI,-0.001468,,0.369,0.055896,0.21,Bad,4,2


In [18]:
for ticker in valid_nasdaq_list:
    counter=df_min[df_min["ticker"]==ticker]["ticker"].count()
    print(f"{ticker} : {counter}")

AACG : 112
AADI : 593
AAL : 1944
AAOI : 1904
AAON : 1257
AAPB : 271
AAPD : 1019
AAPL : 1944
AAPU : 1291
AAXJ : 991
ABAT : 1529
ABCL : 1774
ABEO : 781
ABL : 991
ABLLL : 195
ABNB : 1932
ABOS : 840
ABP : 813
ABSI : 1841
ABTS : 136
ABUS : 1251
ABVC : 375
ABVE : 208


Example of how to access the data about a ticker:

In [23]:
df_min.head()

Unnamed: 0,date,open,high,low,close,volume,ticker,variation
0,2024-12-16 14:30:00,0.89,0.89,0.89,0.89,0.0,AACG,0.0
1,2024-12-16 14:31:00,0.9,0.9,0.9,0.9,374.0,AACG,0.0
31,2024-12-16 15:01:00,0.892,0.892,0.8901,0.8901,400.0,AACG,0.0019
45,2024-12-16 15:15:00,0.891,0.8915,0.89,0.89,1510.0,AACG,0.0015
70,2024-12-16 15:40:00,0.8901,0.8901,0.8901,0.8901,268.0,AACG,0.0


In [22]:
print(df_min.isna().sum())

date         0
open         0
high         0
low          0
close        0
volume       0
ticker       0
variation    0
dtype: int64


3. Analysis and visualizations

Interface to help you chose a company in fonction of the desired term time:

In [56]:
def recommandations(termTime):
    
    if(termTime=="Long Term"):
        sortDF=sharpReturnDf.sort_values(by=["longTermScore","sharpReturn"],ascending=[False,False])
        title="Best companies to invest in for long time term investment: "
        print(f"{title}\n")
        print(sortDF[["ticker","longTermScore"]])
    else:
        sortDF=sharpReturnDf.sort_values(by=["shortTermScore","sharpReturn"],ascending=[False,False])
        title="Best companies to invest in for short time term investment: "
        print(f"{title}\n")
        print(sortDF[["ticker","shortTermScore"]])

termTime=widgets.Dropdown(
    options=["Long Term","Short Term"],
    value="Long Term",
    description="Term Time : "
)

def click(button):
    recommandations(termTime.value)

button=widgets.Button(description="Display")
button.on_click(click)
display(termTime,button)

Dropdown(description='Term Time : ', options=('Long Term', 'Short Term'), value='Long Term')

Button(description='Display', style=ButtonStyle())

Best companies to invest in for short time term investment: 

   ticker  shortTermScore
3    AAOI               4
10   ABAT               4
20   ABUS               4
15   ABNB               4
19   ABTS               4
12   ABEO               4
2     AAL               4
18   ABSI               4
7    AAPL               2
4    AAON               2
8    AAPU               2
5    AAPB               2
0    AACG               2
21   ABVC               2
13    ABL               2
1    AADI               2
16   ABOS               2
17    ABP               2
11   ABCL               2
22   ABVE               2
14  ABLLL               0
9    AAXJ               0
6    AAPD               0


Interface to show the variation in stock value of a company:

In [58]:
def filter_data_by_period(ticker,periode):
    dateToday=datetime.today()

    if periode=="1 Day":
        yesterday=dateToday-timedelta(days=1)
        start_date=yesterday.replace(hour=0,minute=0,second=0,microsecond=0)
    elif periode=="1 Week":
        start_date=dateToday-timedelta(weeks=1)
    elif periode=="1 Month":
        start_date=dateToday-timedelta(weeks=4)
    elif periode=="6 Months":
        start_date=dateToday-timedelta(weeks=26)
    elif periode=="1 Year":
        start_date=dateToday-timedelta(weeks=52)
    elif periode=="5 Years":
        start_date=dateToday-timedelta(weeks=260)

    if (periode=="1 Day") or (periode=="1 Week"):
        filtered=df_min[(df_min["date"]>=start_date)&(df_min["ticker"]==ticker)]
    else:
        filtered=df_day[(df_day["date"]>=start_date)&(df_day["ticker"]==ticker)]
    
    filtered=filtered.sort_values(by="date")
    return filtered

def plot_ticker_with_period(ticker,periode):
    sub=filter_data_by_period(ticker,periode)

    if not sub.empty:
        firstClose=sub["close"].iloc[0]
        lastClose=sub["close"].iloc[-1]
        var=((lastClose-firstClose)/firstClose)*100
    else:
        var=0

    if var>0:
        varClose=f"+{var:.2f}%"
    else:
        varClose=f"{var:.2f}%"

    if(periode=="1 Day"):
        sub.loc[sub["date"].diff()>timedelta(hours=12),"close"]=None
        sub["heure"]=sub["date"].dt.strftime("%d %H:%M")
        sub=sub.sort_values(by="date")
        x_label=sub["heure"]
    elif(periode=="1 Week"):
        sub.loc[sub["date"].diff()>timedelta(hours=12),"close"]=None
        sub=sub.sort_values(by="date")
        sub["day"]=sub["date"].dt.strftime("%d %H:%M")
        x_label=sub["day"]
    else:
        sub=sub.sort_values(by="date")
        x_label=sub["date"]
    
    fig=go.Figure()
    fig.add_trace(go.Scatter(
        x=x_label,
        y=sub["close"],
        mode="lines",
        name=f"Close value ({ticker})",
        line=dict(color="blue",width=2),
        connectgaps=False
    ))
    if(periode=="1 Day"):
        titlex="Hour"
        ntickss=24
    elif((periode=="1 Week")):
        titlex="Date"
        ntickss=7
    else:
        titlex="Date"

    if((periode=="1 Day")or(periode=="1 Week")):
        xaxiss=dict(title=titlex,type="category",nticks=ntickss,showgrid=True)
    else:
        xaxiss=dict(title=titlex,showgrid=True)

    fig.update_layout(
        title=f"Close values for {ticker} ({periode}) , {varClose}",
        xaxis=xaxiss,
        yaxis_title="Close value (in $)",
        template="plotly_white"
    )
    
    fig.show()

tickers=valid_nasdaq_list
periode=["1 Day","1 Week","1 Month","6 Months","1 Year","5 Years"]

interact(
    plot_ticker_with_period,
    ticker=widgets.Dropdown(options=tickers,description="Select Ticker: "),
    periode=widgets.Dropdown(options=periode,description="Select Period: ")
)

interactive(children=(Dropdown(description='Select Ticker: ', options=('AACG', 'AADI', 'AAL', 'AAOI', 'AAON', …

<function __main__.plot_ticker_with_period(ticker, periode)>