In [1]:
import pandas as pd
import numpy as np 
import scipy.stats as stats

In [2]:
FILE_PATH = '../data/MSFT.csv'

In [3]:
msft = pd.read_csv(FILE_PATH)
msft

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-10,162.820007,163.220001,161.179993,161.339996,159.648727,20725900
1,2020-01-13,161.759995,163.309998,161.259995,163.279999,161.568405,21626500
2,2020-01-14,163.389999,163.600006,161.720001,162.130005,160.430450,23477400
3,2020-01-15,162.619995,163.940002,162.570007,163.179993,161.469452,21417900
4,2020-01-16,164.350006,166.240005,164.029999,166.169998,164.428116,23865400
...,...,...,...,...,...,...,...
247,2021-01-04,222.529999,223.000000,214.809998,217.690002,217.690002,37130100
248,2021-01-05,217.259995,218.520004,215.699997,217.899994,217.899994,23823000
249,2021-01-06,212.169998,216.490005,211.940002,212.250000,212.250000,35930700
250,2021-01-07,214.039993,219.339996,213.710007,218.289993,218.289993,27694500


In [32]:
msft['Date'] = pd.to_datetime(msft['Date'])

In [34]:
msft['Year'] = msft['Date'].dt.year

In [35]:
msft['Month'] = msft['Date'].dt.month

In [36]:
msft['Day'] = msft['Date'].dt.day

In [94]:
msft['Weekday'] = msft['Date'].dt.weekday

In [95]:
msft['Weekday'] = msft['Weekday'].map({
    0:"Monday",
    1:"Tuesday",
    2:"Wednesday",
    3:"Thursday",
    4:"Friday",
    5:"Saturday",
    6:"Sunday"
})

In [96]:
msft['Week_Range'] = msft['Date'].dt.to_period('W')

In [97]:
msft.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,PriceIncrease,Year,Month,Day,high_more_than_5pct_low,pct_change,Weekday,Week_Range
0,2020-01-10,162.820007,163.220001,161.179993,161.339996,159.648727,20725900,False,2020,1,10,False,False,Friday,2020-01-06/2020-01-12
1,2020-01-13,161.759995,163.309998,161.259995,163.279999,161.568405,21626500,True,2020,1,13,False,False,Monday,2020-01-13/2020-01-19
2,2020-01-14,163.389999,163.600006,161.720001,162.130005,160.43045,23477400,False,2020,1,14,False,False,Tuesday,2020-01-13/2020-01-19
3,2020-01-15,162.619995,163.940002,162.570007,163.179993,161.469452,21417900,True,2020,1,15,False,False,Wednesday,2020-01-13/2020-01-19
4,2020-01-16,164.350006,166.240005,164.029999,166.169998,164.428116,23865400,True,2020,1,16,False,False,Thursday,2020-01-13/2020-01-19


1. Probability of Increase: What is the probability that MSFT’s stock closes higher than its opening price on any given day?
---

In [22]:
print(f"Probability of closing higher than opening: {np.round(len(msft[msft['Close']>msft['Open']])/len(msft)*100, 3)}%")

Probability of closing higher than opening: 53.571%


2. Volume Analysis: Calculate the probability of the trading volume exceeding its average value over the past year.
---

In [41]:
avg_vol = msft[msft['Year']==msft['Year'].max()]['Volume'].mean()
print(f"Probability of trading volume exceeding its average value: {len(msft[(msft['Volume'] > avg_vol) & (msft['Year']==msft['Year'].max())]) / len(msft)*100}%")

Probability of trading volume exceeding its average value: 0.7936507936507936%


3. High-Low Range: What is the probability that the stock’s daily high is more than 5% above its daily low?
---

In [54]:
msft['high_more_than_5pct_low'] = msft['High'] > msft['Low']*1.05
print(f"Probability of the stocks daily high being more than 5% daily low: {msft['high_more_than_5pct_low'].value_counts(normalize=True).loc[True]}%")

Probability of the stocks daily high being more than 5% daily low: 0.10714285714285714%


4. Significant Drops: Determine the probability of MSFT’s stock experiencing a drop of more than 10% in its closing price compared to the previous day’s close.
---

In [69]:
msft['pct_change'] = msft['Close'].pct_change() <= -0.1
print(f"Probability of MSFT's stock experiencing a drop of more than 10% in its closing price: {len(msft[msft['pct_change'] == True]) / len(msft)*100}%")

Probability of MSFT's stock experiencing a drop of more than 10% in its closing price: 0.3968253968253968%


5. Consecutive Gains: What is the probability of the stock closing higher for three consecutive days?
---

In [72]:
msft['PriceIncrease'] = msft['Close'] > msft['Open']
consecutive_days = 3
probability_consecutive_increase = 0

for i in range(len(msft) - consecutive_days+1):
    if all(msft['PriceIncrease'][i:i+consecutive_days]):
        probability_consecutive_increase += 1

probability_consecutive_increase /= len(msft) - consecutive_days + 1

print(f"Probability of closing higher for {consecutive_days} consecutive days: {probability_consecutive_increase:.2%}")

Probability of closing higher for 3 consecutive days: 11.20%


6. Weekly Performance: Calculate the probability of MSFT stock closing higher at the end of the week compared to its opening price on Monday.
---

In [108]:
# for i in msft['Weekday']:
#     if i == 'Friday':

print(f"Probability of stock closing higher at end of week than opening price: {np.round(len(msft[(msft['Weekday'] == 'Monday') & (msft['PriceIncrease'] == False)]) / len(msft)*100, 4)}%")


Probability of stock closing higher at end of week than opening price: 7.5397%


In [101]:
msft

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,PriceIncrease,Year,Month,Day,high_more_than_5pct_low,pct_change,Weekday,Week_Range
0,2020-01-10,162.820007,163.220001,161.179993,161.339996,159.648727,20725900,False,2020,1,10,False,False,Friday,2020-01-06/2020-01-12
1,2020-01-13,161.759995,163.309998,161.259995,163.279999,161.568405,21626500,True,2020,1,13,False,False,Monday,2020-01-13/2020-01-19
2,2020-01-14,163.389999,163.600006,161.720001,162.130005,160.430450,23477400,False,2020,1,14,False,False,Tuesday,2020-01-13/2020-01-19
3,2020-01-15,162.619995,163.940002,162.570007,163.179993,161.469452,21417900,True,2020,1,15,False,False,Wednesday,2020-01-13/2020-01-19
4,2020-01-16,164.350006,166.240005,164.029999,166.169998,164.428116,23865400,True,2020,1,16,False,False,Thursday,2020-01-13/2020-01-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,2021-01-04,222.529999,223.000000,214.809998,217.690002,217.690002,37130100,False,2021,1,4,False,False,Monday,2021-01-04/2021-01-10
248,2021-01-05,217.259995,218.520004,215.699997,217.899994,217.899994,23823000,True,2021,1,5,False,False,Tuesday,2021-01-04/2021-01-10
249,2021-01-06,212.169998,216.490005,211.940002,212.250000,212.250000,35930700,True,2021,1,6,False,False,Wednesday,2021-01-04/2021-01-10
250,2021-01-07,214.039993,219.339996,213.710007,218.289993,218.289993,27694500,True,2021,1,7,False,False,Thursday,2021-01-04/2021-01-10
