In [44]:
# Required Libraries

import yfinance as yf
import pandas as pd
import plotly.express as px

The main focus in this practice exercise is review and analyse last 5 year stock evolution for a company.

In my election I choosed Pfizer, I want to review how its value changed during all these years, because at current date (12/sept/2025) we're still so near of pandemic consequences.

In [45]:
# Extracting company
pfizer = yf.Ticker("PFE")

# Extracting company's real data
data = pfizer.history(period="5y").reset_index()


In [46]:
# All required data are available in "data" variable
# seeing a sample and reviewing data
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2020-09-15 00:00:00-04:00,27.848159,27.967968,27.563613,27.675934,22003726,0.0,0.0
1,2020-09-16 00:00:00-04:00,27.915553,27.938019,27.496222,27.541151,22208412,0.0,0.0
2,2020-09-17 00:00:00-04:00,27.541153,27.683425,27.368925,27.571104,17894390,0.0,0.0
3,2020-09-18 00:00:00-04:00,27.54115,27.758302,27.3839,27.428829,31694623,0.0,0.0
4,2020-09-21 00:00:00-04:00,27.234141,27.316511,26.492822,26.972057,26608968,0.0,0.0


As extra step, we can made a data exploration to see little patterns and behaviors about the data

info() retrieve us descriptions about data types and data consistency while describe() will show us descriptive statistics about continuous variables data distribution

In [47]:
# Reviewing data info and description
print('Info data:')
print(data.info())

print('Data description:')
print(data.describe())

Info data:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1256 entries, 0 to 1255
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype                           
---  ------        --------------  -----                           
 0   Date          1256 non-null   datetime64[ns, America/New_York]
 1   Open          1256 non-null   float64                         
 2   High          1256 non-null   float64                         
 3   Low           1256 non-null   float64                         
 4   Close         1256 non-null   float64                         
 5   Volume        1256 non-null   int64                           
 6   Dividends     1256 non-null   float64                         
 7   Stock Splits  1256 non-null   float64                         
dtypes: datetime64[ns, America/New_York](1), float64(6), int64(1)
memory usage: 78.6 KB
None
Data description:
              Open         High          Low        Close        Volume  \
count  1256.00000

In [48]:
# To keep a clean format, we'll change the date format
data["Date"] = data["Date"].dt.tz_localize(None)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2020-09-15,27.848159,27.967968,27.563613,27.675934,22003726,0.0,0.0
1,2020-09-16,27.915553,27.938019,27.496222,27.541151,22208412,0.0,0.0
2,2020-09-17,27.541153,27.683425,27.368925,27.571104,17894390,0.0,0.0
3,2020-09-18,27.54115,27.758302,27.3839,27.428829,31694623,0.0,0.0
4,2020-09-21,27.234141,27.316511,26.492822,26.972057,26608968,0.0,0.0


Now once the data is saving, we can work with it, in this case, just we'll made a simple graph about daily price evolution

In [49]:
# Restructuring data to see both conditions (open price and clos price) by different subplots
df_long = data.melt(id_vars="Date", value_vars=["Open", "Close"],
                    var_name="Type", value_name="Price")

fig = px.line(
    data_frame = df_long,
    x = 'Date',
    y = 'Price',
    color = 'Type',
    facet_row = 'Type',
    title = 'PFE stock prices in the last 5 years'
)

# Render the image
fig.show()


As extra element, we want to include references based on State of Emergency caused by the COVID.

By the current case, we assume start date as 2020-03-15 and end date as 2021-07-01

In [50]:
# Define references Dates
start_date = pd.to_datetime("2020-03-15")
end_date   = pd.to_datetime("2021-07-01")

# Clear shapes over the graphs if necessary
fig.layout.shapes = ()
fig.layout.annotations = ()


# Adding vertical line references over the layout

# Line for start date
fig.add_shape(
    type="line",
    x0 = start_date,
    x1 = start_date,
    y0 = 0,
    y1 = 1,
    xref = "x",
    yref = "paper",
    line = dict(color="orange", width=2, dash="dash")
)
# Add a title for the start date reference line
fig.add_annotation(
    x = start_date,
    y = 1,
    xref = "x",
    yref = "paper",
    text = "Emergency Started",
    showarrow = False,
    yanchor = "bottom",
    font = dict(color="Orange")
)

# Line for end date
fig.add_shape(
    type = "line",
    x0 = end_date,
    x1 = end_date,
    y0 = 0,
    y1 = 1,
    xref = "x",
    yref = "paper",
    line = dict(color = "green", width = 2, dash = "dash")
)
# Add a title for the end date reference line
fig.add_annotation(
    x = end_date,
    y = 1,
    xref = "x",
    yref = "paper",
    text = "Emergency Ended",
    showarrow = False,
    yanchor = "bottom",
    font = dict(color="green")
)

fig.show()