In [324]:
!pip install yfinance plotly mplfinance --quiet

import yfinance as yf
import mplfinance as mpf
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from plotly.subplots import make_subplots
from scipy.stats import binom

In [325]:
plt.style.use('seaborn-v0_8-darkgrid')
pd.set_option('display.max_columns', None)

In [326]:
# Downloading 1 year of daily OHLCV data
ticker = "RELIANCE.NS"
end_date = datetime.today()
start_date = end_date - timedelta(days=365)

data = yf.download(ticker, start=start_date, end=end_date, interval='1d')

# Cleaning the data
data.dropna(inplace=True)
data.index = pd.to_datetime(data.index)
data.describe()
print("Data shape:", data.shape)
print(data.head())

# print("\nData shape:", data.shape)
# print("\nFirst 5 rows:")
# print(data.head())
# print("\nLast 5 rows:")
# print(data.tail())
# print("\nMissing values:")
# print(data.isnull().sum())
# print("\nDate range:")
# print(f"From: {data.index.min()} To: {data.index.max()}")

[*********************100%***********************]  1 of 1 completed

Data shape: (248, 5)
Price             Close         High          Low         Open      Volume
Ticker      RELIANCE.NS  RELIANCE.NS  RELIANCE.NS  RELIANCE.NS RELIANCE.NS
Date                                                                      
2024-05-31  1425.561646  1437.371538  1417.439189  1426.458626    31069832
2024-06-03  1505.216309  1509.377235  1454.064963  1477.983783    21527942
2024-06-04  1392.548706  1492.982856  1354.702182  1492.982856    36709098
2024-06-05  1415.944336  1426.309182  1379.418280  1417.190108    17464890
2024-06-06  1426.757568  1438.816616  1416.542239  1430.146092    17855722





In [327]:
import plotly.graph_objects as go
import pandas as pd

# Creating the candlestick chart
fig = go.Figure(data=[go.Candlestick(
    x=data.index,
    open=data[('Open', 'RELIANCE.NS')],
    high=data[('High', 'RELIANCE.NS')],
    low=data[('Low', 'RELIANCE.NS')],
    close=data[('Close', 'RELIANCE.NS')],
    name='RELIANCE'
)])

# Calculating daily returns
data[('Daily_Return', 'RELIANCE.NS')] = data[('Close', 'RELIANCE.NS')].pct_change()
daily_returns = data[('Daily_Return', 'RELIANCE.NS')]

# Finding actual patterns from the data
max_drop_idx = daily_returns.idxmin()
max_gain_idx = daily_returns.idxmax()
max_drop_price = data.loc[max_drop_idx, ('Close', 'RELIANCE.NS')]
max_gain_price = data.loc[max_gain_idx, ('Close', 'RELIANCE.NS')]

drop_value = daily_returns.loc[max_drop_idx]
gain_value = daily_returns.loc[max_gain_idx]

# Adding annotations
annotations = [
    dict(
        x=max_drop_idx,
        y=max_drop_price,
        text=f"Biggest Drop<br>{drop_value:.2%}",
        showarrow=True,
        arrowhead=2,
        arrowcolor="red",
        ax=20,
        ay=-30,
        bgcolor="rgba(255,255,255,0.8)",
        bordercolor="red",
        borderwidth=1
    ),
    dict(
        x=max_gain_idx,
        y=max_gain_price,
        text=f"Biggest Gain<br>{gain_value:.2%}",
        showarrow=True,
        arrowhead=2,
        arrowcolor="green",
        ax=-20,
        ay=-30,
        bgcolor="rgba(255,255,255,0.8)",
        bordercolor="green",
        borderwidth=1
    )
]

# Adding vertical lines
fig.add_shape(
    type="line",
    x0=max_drop_idx, x1=max_drop_idx,
    y0=0, y1=1,
    yref="paper",
    line=dict(color="red", width=2, dash="dash")
)

fig.add_shape(
    type="line",
    x0=max_gain_idx, x1=max_gain_idx,
    y0=0, y1=1,
    yref="paper",
    line=dict(color="green", width=2, dash="dash")
)

fig.update_layout(
    title={
        'text': 'Candlestick Chart - RELIANCE NSE',
        'x': 0.5,
        'xanchor': 'center'
    },
    yaxis_title='Price (INR)',
    xaxis_title='Date',
    annotations=annotations,
    xaxis_rangeslider_visible=False,
    height=600,
    width=1000,
    hovermode='x unified'
)

fig.show()

print(f"Biggest Drop: {drop_value:.2%} on {max_drop_idx.date()}")
print(f"Biggest Gain: {gain_value:.2%} on {max_gain_idx.date()}")


Biggest Drop: -7.49% on 2024-06-04
Biggest Gain: 5.59% on 2024-06-03


In [328]:
# Calculating returns
data['Simple_Return'] = data['Close'].pct_change()
data['Log_Return'] = np.log(data['Close'] / data['Close'].shift(1))

# Plotting simple vs. log returns
fig_returns = make_subplots(rows=1, cols=2, subplot_titles=('Daily Simple Returns', 'Daily Log Returns'))
fig_returns.add_trace(go.Scatter(x=data.index, y=data['Simple_Return'], mode='lines', name='Simple Return'), row=1, col=1)
fig_returns.add_trace(go.Scatter(x=data.index, y=data['Log_Return'], mode='lines', name='Log Return'), row=1, col=2)
fig_returns.update_layout(title_text='Simple vs Log Returns', height=400, width=900)
fig_returns.show()

Daily simple and log returns often look similar because for small daily price changes, the mathematical difference between them is negligible.
Simple returns are calculated as the percentage change in price, while log returns are the natural logarithm of the price ratio. When price changes are small (typically less than a few percent), both methods yield nearly identical results.
However, for larger price swings (such as those seen in highly volatile markets or after major news events), log returns and simple returns can diverge. Log returns are commonly used in financial modeling because they are additive over time and more suitable for statistical analysis.

In [329]:
# Calculating volatility
data[('Simple_Return', 'RELIANCE.NS')] = data[('Close', 'RELIANCE.NS')].pct_change()
data[('Volatility_14d', 'RELIANCE.NS')] = data[('Simple_Return', 'RELIANCE.NS')].rolling(window=14).std()

fig_volatility = make_subplots(specs=[[{"secondary_y": True}]])

# Adding closing price trace (primary y-axis)
fig_volatility.add_trace(
    go.Scatter(
        x=data.index,
        y=data[('Close', 'RELIANCE.NS')],
        name='Closing Price',
        line=dict(color='blue', width=2)
    ),
    secondary_y=False
)

# Adding volatility trace (secondary y-axis)
fig_volatility.add_trace(
    go.Scatter(
        x=data.index,
        y=data[('Volatility_14d', 'RELIANCE.NS')],
        name='14-Day Volatility',
        line=dict(color='red', width=2)
    ),
    secondary_y=True
)

fig_volatility.update_layout(
    title='Closing Price and 14-Day Rolling Volatility',
    xaxis_title='Date',
    height=500,
    width=1000
)
fig_volatility.update_yaxes(title_text="Closing Price (INR)", secondary_y=False)
fig_volatility.update_yaxes(title_text="14-Day Volatility", secondary_y=True)
fig_volatility.show()


In [330]:
if ('Simple_Return', '') not in data.columns:
    data['Simple_Return'] = data['Close'].pct_change()

valid_days = data.dropna(subset=[('Simple_Return', '')])
valid_days['Day_Class'] = np.where(valid_days[('Simple_Return', '')] > 0, 'UP', 'DOWN')

# Calculating actual P(UP) from the data
p_up_actual = (valid_days['Day_Class'] == 'UP').mean()
print(f"Actual Probability of an 'UP' day from data: {p_up_actual:.4f}")

Actual Probability of an 'UP' day from data: 0.5263




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [331]:
p_up = 0.6
n = 10

# Theoretical probabilities
prob_exactly_6 = binom.pmf(6, n, p_up)
prob_atleast_8 = binom.sf(7, n, p_up)

# Simulation
np.random.seed(42)
simulations = binom.rvs(n, p_up, size=1000)
simulated_6 = np.mean(simulations == 6)
simulated_atleast_8 = np.mean(simulations >= 8)

print(f"Theoretical P(6 up): {prob_exactly_6:.2f} | Simulated: {simulated_6:.2f}")
print(f"Theoretical P(atleast 8 up): {prob_atleast_8:.2f} | Simulated: {simulated_atleast_8:.2f}")

Theoretical P(6 up): 0.25 | Simulated: 0.25
Theoretical P(atleast 8 up): 0.17 | Simulated: 0.19


In [332]:
# Calculating average returns for up and down days from actual data
up_days = valid_days[valid_days['Day_Class'] == 'UP'][('Simple_Return', '')]
down_days = valid_days[valid_days['Day_Class'] == 'DOWN'][('Simple_Return', '')]

avg_return_up = up_days.mean()
avg_return_down = down_days.mean()

print(f"Average return on UP days: {avg_return_up:.4f} ({avg_return_up*100:.2f}%)")
print(f"Average return on DOWN days: {avg_return_down:.4f} ({avg_return_down*100:.2f}%)")

# Scenario: 6 up days, 4 down days
up_days_scenario = 6
down_days_scenario = 4

total_return_scenario = (up_days_scenario * avg_return_up) + (down_days_scenario * avg_return_down)
print(f"\nScenario: {up_days_scenario} up days, {down_days_scenario} down days")
print(f"Total return: ({up_days_scenario} × {avg_return_up:.4f}) + ({down_days_scenario} × {avg_return_down:.4f}) = {total_return_scenario:.4f}")
print(f"Total return percentage: {total_return_scenario*100:.2f}%")

Average return on UP days: 0.0104 (1.04%)
Average return on DOWN days: -0.0113 (-1.13%)

Scenario: 6 up days, 4 down days
Total return: (6 × 0.0104) + (4 × -0.0113) = 0.0170
Total return percentage: 1.70%


**Problem Statement 3: The Probability Paradox**

*Part 2: The Paradox*

Average up day return = +0.8%

Average down day return = -1.2%

Total return = (1.008^6)*(0.988^4) - 1
             = 1.049*0.953 - 1
             = 0.999 - 1 = -0.1% loss

In [333]:
win_amount = 100
lose_amount = -150

# Using actual probability from our data
ev_actual = (p_up_actual * win_amount) + ((1 - p_up_actual) * lose_amount)
print(f"Using actual P(UP) = {p_up_actual:.4f}:")
print(f"Expected Value = ({p_up_actual:.4f} × {win_amount}) + ({1-p_up_actual:.4f} × {lose_amount})")
print(f"Expected Value = {ev_actual:.2f} INR per bet")

# Using the assignment's example P(UP) = 0.6
ev_example = (0.6 * win_amount) + (0.4 * lose_amount)
print(f"\nUsing example P(UP) = 0.6:")
print(f"Expected Value = (0.6 × {win_amount}) + (0.4 × {lose_amount})")
print(f"Expected Value = {ev_example:.2f} INR per bet")

Using actual P(UP) = 0.5263:
Expected Value = (0.5263 × 100) + (0.4737 × -150)
Expected Value = -18.42 INR per bet

Using example P(UP) = 0.6:
Expected Value = (0.6 × 100) + (0.4 × -150)
Expected Value = 0.00 INR per bet


With only a 52.63% win probability, this bet becomes an even worse proposition, losing ₹18.43 on average per bet.
The math is brutally clear: you're essentially flipping a slightly biased coin (52.6% vs 47.4%) but getting punished disproportionately when you lose. For every ₹100 you might win, you risk losing ₹150 - a 1.5:1 risk-reward ratio working against you.
To break even with a 52.63% win rate, you'd need to win at least ₹142.50 per successful bet (not ₹100) to offset the ₹150 losses. The current structure creates a negative expectancy that compounds over time.
This perfectly mirrors why 90% of day traders lose money - they chase win rates without considering the mathematics of risk-reward ratios.