In [113]:
! pip install numpy pandas seaborn matplotlib yfinance plotly mplfinance --quiet

In [114]:
import numpy as np
import pandas as pd
import yfinance as yf
import seaborn as sns
import datetime as dt
import mplfinance as mpf
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

In [115]:
# Set some display options
plt.style.use('seaborn-v0_8-darkgrid')
pd.set_option('display.max_columns', None)

# 1: Data Exploration & Visualization

In [116]:
# 1. Obtaining Data
start = dt.datetime(2023, 1, 1)
end = dt.datetime(2023, 12, 31)

df_single=yf.download('AAPL',start,end, auto_adjust=True,progress=False )
df_single = df_single.xs('AAPL', axis=1, level=1)
df_single.head().dropna() #dropped Rows with Missing Values
# df_single.info()




Price,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-03,123.470619,129.22606,122.582127,128.613993,112117500
2023-01-04,124.744125,127.014716,123.480495,125.267347,89113600
2023-01-05,123.421257,126.13609,123.164587,125.504275,80962700
2023-01-06,127.962418,128.623848,123.292909,124.398589,87754700
2023-01-09,128.485641,131.703962,128.228972,128.801557,70790800


In [117]:
# 2. Visual Insight

fig = go.Figure(data=[
    go.Candlestick(x=df_single.index,
                   open=df_single['Open'],
                   high=df_single['High'],
                   low=df_single['Low'],
                   close=df_single['Close'])
              ])
fig.update_layout(title=f"Candlestick Chart - {'AAPL'}", xaxis_title='Date', yaxis_title='Price')
fig.show()



In [118]:
# Let's Filter 5 months data for a better visualization of patterns, trends
start_date = '2023-06-01'
end_date   = '2023-11-30'
df_filtered = df_single.loc[start_date:end_date]

fig = go.Figure(data=[
        go.Candlestick(
            x=df_filtered.index,
            open=df_filtered['Open'],
            high=df_filtered['High'],
            low=df_filtered['Low'],
            close=df_filtered['Close']
        )
    ])
fig.update_layout(
        title=f"Candlestick Chart - AAPL ({start_date} to {end_date})",
        xaxis_title='Date',
        yaxis_title='Price'
    )
fig.show()




In [119]:
fig.layout.update({
    'title': 'Candlestick Chart - AAPL',
    'xaxis_title': 'Date',
    'yaxis_title': 'Price',

    'shapes': [
        {'x0': '2023-08-01', 'x1': '2023-08-01', 'y0': 0, 'y1': 1, 'xref': 'x', 'yref': 'paper'},
        {'x0': '2023-10-12', 'x1': '2023-10-12', 'y0': 0, 'y1': 1, 'xref': 'x', 'yref': 'paper'},
        {'x0': '2023-10-30', 'x1': '2023-10-30', 'y0': 0, 'y1': 1, 'xref': 'x', 'yref': 'paper'}
    ],

    'annotations': [
        {'x': '2023-08-01', 'y': 0.05, 'xref': 'x', 'yref': 'paper', 'text': 'Sudden Drop'},
        {'x': '2023-10-12', 'y': 0.05, 'xref': 'x', 'yref': 'paper', 'text': 'Trend Reversal'},
        {'x': '2023-10-30', 'y': 0.05, 'xref': 'x', 'yref': 'paper', 'text': 'Trend Reversal'}
    ]
})

fig.show()


#2: Returns & Volatility


- **Simple Return**: (P_t - P_{t-1}) / P_{t-1}
- **Log Return**: log(P_t / P_{t-1}) → additive over time

In [120]:
# 1. Return Computation
df_single['Daily Return'] = df_single['Close'].pct_change()
df_single['Log Return'] = np.log(df_single['Close'] / df_single['Close'].shift(1))
df_single[['Daily Return', 'Log Return']].dropna().head()

Price,Daily Return,Log Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-04,0.010314,0.010261
2023-01-05,-0.010605,-0.010661
2023-01-06,0.036794,0.036133
2023-01-09,0.004089,0.004081
2023-01-10,0.004456,0.004446


In [121]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2)

fig.add_trace(
    go.Line(x=df_single['Daily Return'].index, y=df_single['Daily Return']),
    row=1, col=1
)

fig.add_trace(
    go.Line(x=df_single['Log Return'].index, y=df_single['Log Return']),
    row=1, col=2
)

fig.update_layout(title_text="Daily (blue) vs. Log (red) Returns")
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




Let’s say the daily return on a stock is x. The “log return” is just the log of (1 + x). Now, in math, there’s a rule that says when x is very small (close to zero), log(1 + x) is almost the same as x. This is useful because stock prices usually don’t change much in a single day — they move up or down just a little. That’s why the log return and the regular return (also called simple return) usually end up being nearly the same, and their graphs often look very similar.

Also, since log(1 + x) increases as x increases, both types of return go up or down together — when one rises, so does the other.

However, when there’s a big change in price — a large jump or drop — the difference between the two becomes noticeable. On such days, the log return and the simple return can give quite different values.

In [122]:
# 2. Volatility Estimation
df_single['Volatility']=df_single['Daily Return'].rolling(14).std()
df_single[['Volatility']].dropna().head()

Price,Volatility
Date,Unnamed: 1_level_1
2023-01-24,0.012515
2023-01-25,0.013067
2023-01-26,0.011944
2023-01-27,0.009286
2023-01-30,0.012021


In [123]:
fig = make_subplots(rows=2, cols=1)
fig.add_trace(
    go.Scatter(
        x=df_single['Close'].index,
        y=df_single['Close'],
        mode='lines',
        name='Close'
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=df_single['Volatility'].index,
        y=df_single['Volatility'],
        mode='lines',
        name='Volatility',
        line=dict(color='red')
    ),
    row=2, col=1
)
fig.update_layout(
    title_text="Closing Price (blue) vs. Volatility (red)",
    showlegend=True
)
fig.show()


#3: The Probability Paradox

In [124]:
from scipy.stats import binom
df_single['Positive']=df_single['Daily Return']>0
df_single.head()

Price,Close,High,Low,Open,Volume,Daily Return,Log Return,Volatility,Positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-03,123.470619,129.22606,122.582127,128.613993,112117500,,,,False
2023-01-04,124.744125,127.014716,123.480495,125.267347,89113600,0.010314,0.010261,,True
2023-01-05,123.421257,126.13609,123.164587,125.504275,80962700,-0.010605,-0.010661,,False
2023-01-06,127.962418,128.623848,123.292909,124.398589,87754700,0.036794,0.036133,,True
2023-01-09,128.485641,131.703962,128.228972,128.801557,70790800,0.004089,0.004081,,True


In [125]:
P_up=df_single['Positive'].sum()/(len(df_single['Positive'])-1)
print("Value of P(UP) :",P_up*100,"%")

Value of P(UP) : 56.22489959839358 %


In [126]:
# 1: Theory vs Reality
from scipy.stats import binom
n=10
p=P_up
rv=binom(n,p)
print("Theoretical probability of exactly 6 heads: ",rv.pmf(6))
print("Theoretical probability of at least 8 heads: ",1-rv.cdf(7))
rvs=rv.rvs(size=1000)
print("Emperical probability of exactly 6 heads: ",(rvs==6).sum()/1000)
print("Emperical probability of at least 8 heads: ",(rvs>7).sum()/1000)



Theoretical probability of exactly 6 heads:  0.24361275858915113
Theoretical probability of at least 8 heads:  0.11385575951357996
Emperical probability of exactly 6 heads:  0.255
Emperical probability of at least 8 heads:  0.106


In [128]:
# 2: The Paradox
avg_Positive=df_single['Log Return'][df_single['Positive']].mean()
avg_loss=df_single['Log Return'][df_single['Positive']==False].mean()
print("Average Positive %: ",avg_Positive*100,"%")
print("Average loss %: ",avg_loss*100,"%")
print("Expected earning from 6 Positives and 4 losses = ",(6*avg_Positive+4*avg_loss)*100,"%")

Average Positive %:  1.0204264538612051 %
Average loss %:  -0.9097658120189531 %
Expected earning from 6 Positives and 4 losses =  2.483495475091418 %


In [130]:
df_single['RecentPositives']=df_single['Positive'].rolling(10).sum()
df_single['RecentReturns']=df_single['Log Return'].rolling(10).sum()
df_single.head()

Price,Close,High,Low,Open,Volume,Daily Return,Log Return,Volatility,Positive,RecentPositives,RecentReturns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-03,123.470619,129.22606,122.582127,128.613993,112117500,,,,False,,
2023-01-04,124.744125,127.014716,123.480495,125.267347,89113600,0.010314,0.010261,,True,,
2023-01-05,123.421257,126.13609,123.164587,125.504275,80962700,-0.010605,-0.010661,,False,,
2023-01-06,127.962418,128.623848,123.292909,124.398589,87754700,0.036794,0.036133,,True,,
2023-01-09,128.485641,131.703962,128.228972,128.801557,70790800,0.004089,0.004081,,True,,


In [132]:
Positivedows_with_six_Positives = df_single[df_single['RecentPositives'] == 6]
total_Positivedows = Positivedows_with_six_Positives.shape[0]

positive_Positivedows = Positivedows_with_six_Positives['RecentReturns'].gt(0).sum()
profitable_pct = (positive_Positivedows / total_Positivedows) * 100
print(f"In the 10-day periods throughout the year with 6 Positives and 4 losses, only {profitable_pct:.2f}% were profitable.")


In the 10-day periods throughout the year with 6 Positives and 4 losses, only 89.06% were profitable.


Therefore, even when a 10-day window has 6 up days (more gains than losses), roughly 11% of those periods still end up with an overall loss, in spite of the positive expected return—thereby proving the claim.

In [133]:
# The Ultimate Question
print("Expected Return = ",P_up*100-(1-P_up)*150)

Expected Return =  -9.437751004016064


Define \(h\) on \(text{up}, text{down}) by:
- \(text{up} = 100\)
- \(text{down} = -150\)

Then the expected value is:
\[
E(h(x))
= ∑h(x)ℙ(x)

= h(up)\*ℙ(up) + h(down)\*ℙ(down) = P_up\*100 - (1-P_up)\*150 =
\]

Thus, the expected loss is Rs. 9.45, so this bet is unfavorable.
