In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from datetime import datetime
import warnings
import os

warnings.filterwarnings('ignore')

# Set Seaborn style for visualizations
sns.set_style('whitegrid')
sns.set_palette("husl")

# Define function to fetch data
def fetch_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# Define date range (last 5 years)
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = '2020-01-01'

# Fetch Nifty50 and BankNifty data
nifty50 = fetch_stock_data('^NSEI', start_date, end_date)  # Nifty50 index
banknifty = fetch_stock_data('^NSEBANK', start_date, end_date)  # BankNifty index

# Save raw data to CSV for reference
nifty50.to_csv('nifty50_data.csv')
banknifty.to_csv('banknifty_data.csv')

# 1. Basic Data Exploration
def explore_data(df, name):
    print(f"\n{name} Data Overview:")
    print("Shape:", df.shape)
    print("\nColumns:", df.columns.tolist())
    print("\nMissing Values:\n", df.isnull().sum())
    print("\nBasic Statistics:\n", df.describe())

explore_data(nifty50, "Nifty50")
explore_data(banknifty, "BankNifty")

# 2. Data Preprocessing
# Fill missing values with forward fill
nifty50.fillna(method='ffill', inplace=True)
banknifty.fillna(method='ffill', inplace=True)

# Calculate daily returns
nifty50['Daily_Return'] = nifty50['Close'].pct_change() * 100
banknifty['Daily_Return'] = banknifty['Close'].pct_change() * 100

# Calculate rolling volatility (30-day window)
nifty50['Volatility'] = nifty50['Daily_Return'].rolling(window=30).std()
banknifty['Volatility'] = banknifty['Daily_Return'].rolling(window=30).std()

# 3. Visualizations
# Create directory for plots
if not os.path.exists('plots'):
    os.makedirs('plots')

# Plot 1: Closing Price Trends
plt.figure(figsize=(12, 6))
plt.plot(nifty50.index, nifty50['Close'], label='Nifty50', linewidth=2)
plt.plot(banknifty.index, banknifty['Close'], label='BankNifty', linewidth=2)
plt.title('Nifty50 vs BankNifty Closing Prices', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.grid(True)
plt.savefig('plots/closing_price_trends.png')
plt.close()

# Plot 2: Daily Returns Distribution
plt.figure(figsize=(12, 6))
sns.histplot(nifty50['Daily_Return'].dropna(), bins=50, label='Nifty50', alpha=0.5)
sns.histplot(banknifty['Daily_Return'].dropna(), bins=50, label='BankNifty', alpha=0.5)
plt.title('Distribution of Daily Returns', fontsize=14)
plt.xlabel('Daily Returns (%)')
plt.ylabel('Frequency')
plt.legend()
plt.savefig('plots/daily_returns_distribution.png')
plt.close()

# Plot 3: Rolling Volatility
plt.figure(figsize=(12, 6))
plt.plot(nifty50.index, nifty50['Volatility'], label='Nifty50', linewidth=2)
plt.plot(banknifty.index, banknifty['Volatility'], label='BankNifty', linewidth=2)
plt.title('30-Day Rolling Volatility', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Volatility (Standard Deviation)')
plt.legend()
plt.grid(True)
plt.savefig('plots/rolling_volatility.png')
plt.close()

# Plot 4: Monthly Seasonality (Average Returns by Month)
nifty50['Month'] = nifty50.index.month
banknifty['Month'] = banknifty.index.month

monthly_returns_nifty = nifty50.groupby('Month')['Daily_Return'].mean()
monthly_returns_bank = banknifty.groupby('Month')['Daily_Return'].mean()

plt.figure(figsize=(12, 6))
monthly_returns_nifty.plot(kind='bar', alpha=0.4, label='Nifty50', position=0, width=0.4)
monthly_returns_bank.plot(kind='bar', alpha=0.4, label='BankNifty', position=1, width=0.4, color='orange')
plt.title('Average Monthly Returns', fontsize=14)
plt.xlabel('Month')
plt.ylabel('Average Daily Return (%)')
plt.legend()
plt.savefig('plots/monthly_seasonality.png')
plt.close()

# 4. Performance Metrics
def calculate_metrics(df, name):
    annualized_return = df['Daily_Return'].mean() * 252  # 252 trading days
    annualized_volatility = df['Daily_Return'].std() * np.sqrt(252)
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility != 0 else np.nan

    print(f"\n{name} Performance Metrics:")
    print(f"Annualized Return: {annualized_return:.2f}%")
    print(f"Annualized Volatility: {annualized_volatility:.2f}%")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

calculate_metrics(nifty50, "Nifty50")
calculate_metrics(banknifty, "BankNifty")

# 5. Correlation Analysis
correlation = nifty50['Daily_Return'].corr(banknifty['Daily_Return'])
print(f"\nCorrelation between Nifty50 and BankNifty Daily Returns: {correlation:.4f}")

# Plot 5: Correlation Heatmap
plt.figure(figsize=(8, 6))
correlation_matrix = pd.DataFrame({
    'Nifty50': nifty50['Daily_Return'],
    'BankNifty': banknifty['Daily_Return']
}).corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix of Daily Returns', fontsize=14)
plt.savefig('plots/correlation_heatmap.png')
plt.close()

print("\nEDA completed. Visualizations saved in 'plots' directory.")

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed



Nifty50 Data Overview:
Shape: (1311, 5)

Columns: [('Close', '^NSEI'), ('High', '^NSEI'), ('Low', '^NSEI'), ('Open', '^NSEI'), ('Volume', '^NSEI')]

Missing Values:
 Price   Ticker
Close   ^NSEI     0
High    ^NSEI     0
Low     ^NSEI     0
Open    ^NSEI     0
Volume  ^NSEI     0
dtype: int64

Basic Statistics:
 Price          Close          High           Low          Open        Volume
Ticker         ^NSEI         ^NSEI         ^NSEI         ^NSEI         ^NSEI
count    1311.000000   1311.000000   1311.000000   1311.000000  1.311000e+03
mean    17666.442416  17762.060212  17563.956171  17676.479329  3.819792e+05
std      4284.415104   4288.401809   4280.088277   4282.995092  2.015469e+05
min      7610.250000   8036.950195   7511.100098   7735.149902  0.000000e+00
25%     15055.850098  15140.649902  14965.399902  15054.250000  2.508500e+05
50%     17629.500000  17748.750000  17540.349609  17670.849609  3.085000e+05
75%     21246.925781  21424.750000  21112.274414  21236.349609  4.865