## Stock Dashboard 

In [56]:
# Packages 
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import re
import pandas as pd
import yfinance as yf
from bs4 import BeautifulSoup
import requests
from datetime import date
from dateutil.relativedelta import relativedelta
import os
import pygwalker as pyg
import warnings
warnings.filterwarnings("ignore")

> **RETRIEVING DATA AND WRANGLING IT**

In [27]:
# RETRIEVING DATA AND WRANGLING IT 

# Retrieving full company names 
ticker_list = ['MSFT','MS', 'AAPL', 'TSLA', 'GOOG'] 

def ticker(ticker_list):
    com_name = []
    for ticker in ticker_list:
        url = f"https://finance.yahoo.com/quote/{ticker}"
        soup = BeautifulSoup(requests.get(url).content, 'html.parser')
        name = soup.find('title').text.split(' (')[0]
        com_name.append(name)
    return com_name

com_name_list = ticker(ticker_list)
print(com_name_list)

# Defining timeframe 
today = date.today()
end_date = today.strftime("%Y-%m-%d") #current date
start_date = (today - relativedelta(years=5)).strftime("%Y-%m-%d") #past 5 years

print("Start date:", start_date)
print("End date:", end_date)

# Dowloading data (defining function + calling it)
def download_data(ticker_list, start_date, end_date):
    data = {}
    com_name_list = ticker(ticker_list) # Get the company names for each ticker
    for i, t in enumerate(ticker_list):
        stock = yf.Ticker(t)
        df = yf.download(t, start=start_date, end=end_date, progress=False)
        df['Comp_name'] = re.sub(r'\s*\[.*?\]\s*', '', com_name_list[i]).strip() # Remove text within brackets and trim whitespace
        data[t] = df
    return data

data = download_data(ticker_list, start_date, end_date)

# Concatenating the dataframes
df = pd.concat([data[ticker] for ticker in ticker_list], axis=0)

# Resetting the index
df.reset_index(inplace=True)

['Microsoft Corporation', 'Morgan Stanley', 'Apple Inc.', 'Tesla, Inc.', 'Alphabet Inc.']
Start date: 2018-03-14
End date: 2023-03-14


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6295 entries, 0 to 6294
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       6295 non-null   datetime64[ns]
 1   Open       6295 non-null   float64       
 2   High       6295 non-null   float64       
 3   Low        6295 non-null   float64       
 4   Close      6295 non-null   float64       
 5   Adj Close  6295 non-null   float64       
 6   Volume     6295 non-null   int64         
 7   Comp_name  6295 non-null   object        
dtypes: datetime64[ns](1), float64(5), int64(1), object(1)
memory usage: 393.6+ KB


*Finding all-time high and all-time low for all values in the High column*

In [47]:
df.tail(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Comp_name
6285,2023-02-28,89.540001,91.449997,89.519997,90.300003,90.300003,30546900,Alphabet Inc.
6286,2023-03-01,90.160004,91.199997,89.849998,90.510002,90.510002,26323900,Alphabet Inc.
6287,2023-03-02,89.860001,92.480003,89.769997,92.309998,92.309998,23328600,Alphabet Inc.
6288,2023-03-03,92.739998,94.110001,92.660004,94.019997,94.019997,30220900,Alphabet Inc.
6289,2023-03-06,94.360001,96.300003,94.300003,95.580002,95.580002,28288200,Alphabet Inc.
6290,2023-03-07,95.419998,96.089996,93.844002,94.169998,94.169998,24101500,Alphabet Inc.
6291,2023-03-08,94.404999,96.239998,94.404999,94.650002,94.650002,25395200,Alphabet Inc.
6292,2023-03-09,94.489998,95.919998,92.355003,92.660004,92.660004,24438900,Alphabet Inc.
6293,2023-03-10,92.5,93.18,90.800003,91.010002,91.010002,32831700,Alphabet Inc.
6294,2023-03-13,90.565002,93.080002,89.940002,91.660004,91.660004,31486400,Alphabet Inc.


In [43]:
max_high_date = df.loc[df['High'].idxmax(), 'Date']
print(max_high_date)

2021-11-04 00:00:00


In [42]:
min_high_date = df.loc[df['High'].idxmin(), 'Date']
print(min_high_date)

2019-06-03 00:00:00


> **DATA VISUALIZATION**

In [55]:
# Create a line plot with trendline
fig = px.line(df, x="Date", y="High", color="Comp_name", title="Stock Highs over Time")
#fig.update_traces(mode="markers+lines")
fig.update_traces(mode="markers+lines", line=dict(width=2), marker=dict(size=2))


# Add range slider for date selection
fig.update_layout(xaxis_rangeslider_visible=True)

# Add hover text for additional information on each data point
fig.update_traces(hovertemplate="<br>".join([
    "Date: %{x}",
    "High: %{y:.2f}"
]))

# Add annotations to highlight significant events
fig.add_annotation(x="2021-11-04", y=420, text="All-time high for all selected companies", showarrow=True, arrowhead=1)
fig.add_annotation(x="2019-06-03", y=18, text="All-time low for all selected companies", showarrow=True, arrowhead=1)

# Add shape to highlight period of interest
fig.add_shape(type="rect", xref="x", yref="paper", x0="2020-03-23", x1="2020-06-08", y0=0, y1=1, fillcolor="lightgreen", opacity=0.3, layer="below")

# Update layout
fig.update_layout(
    title={
        'text': "Stock Highs over Time",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis_title="Date",
    yaxis_title="High",
    legend_title="Company",
    font=dict(
        family="Arial",
        size=12,
        color="black"
    ),
)

# Show plot
fig.show()



> **NEXT STEPS: PyGWalker**

In [30]:
gwalker = pyg.walk(df)