In [1]:
# yfinance → to download stock market data
import yfinance as yf
import pandas as pd


In [3]:
# Dictionary of stocks
# Key = Label we want in our dataset
# Value = Actual Yahoo Finance ticker symbol

stocks = {
    "RELIANCE": "RELIANCE.NS",   # Indian large-cap
    "HDFCBANK": "HDFCBANK.NS",   # Indian banking stock
    "NVIDIA": "NVDA",            # US tech stock
    "TESLA": "TSLA"              # US EV / high volatility stock
}


In [7]:
frames = []

for stock_name, ticker_symbol in stocks.items():

    # Create ticker object (ensures single-stock structure)
    ticker = yf.Ticker(ticker_symbol)

    # Download historical data
    df = ticker.history(start="2019-01-01", auto_adjust=False)

    if df.empty:
        print(f"⚠️ No data for {stock_name}")
        continue

    # Reset index so Date becomes column
    df = df.reset_index()

    # Keep only Date and Adjusted Close
    df = df[["Date", "Adj Close"]]

    # Rename column
    df.rename(columns={"Adj Close": "Adj_Close"}, inplace=True)

    # Add stock name
    df["Stock"] = stock_name

    frames.append(df)

# Combine vertically (LONG format)
data = pd.concat(frames, ignore_index=True)

# Convert Date safely (handle timezone properly)
data["Date"] = pd.to_datetime(data["Date"], utc=True).dt.tz_convert(None)

# Ensure Adj_Close is numeric
data["Adj_Close"] = pd.to_numeric(data["Adj_Close"], errors="coerce")

# Drop any bad rows
data.dropna(subset=["Date", "Adj_Close"], inplace=True)

# Sort properly for time-series calculations
data.sort_values(["Stock", "Date"], inplace=True)

# Reset index
data.reset_index(drop=True, inplace=True)

# Verify structure
print("Columns:", data.columns)
print("Unique Stocks:", data["Stock"].unique())
print("Rows per Stock:")
print(data["Stock"].value_counts())

data.info()


Columns: Index(['Date', 'Adj_Close', 'Stock'], dtype='object')
Unique Stocks: ['HDFCBANK' 'NVIDIA' 'RELIANCE' 'TESLA']
Rows per Stock:
Stock
TESLA       1788
NVIDIA      1787
HDFCBANK    1759
RELIANCE    1759
Name: count, dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7093 entries, 0 to 7092
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       7093 non-null   datetime64[ns]
 1   Adj_Close  7093 non-null   float64       
 2   Stock      7093 non-null   object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 166.4+ KB


In [8]:
data.to_csv("clean_stock_data.csv", index=False)


In [9]:
#daily returns
data["Daily_Return"] = (
    data.groupby("Stock")["Adj_Close"]
        .pct_change(fill_method=None)
)


In [10]:
# rolling 30-Day Volatility
data["Rolling_Volatility_30D"] = (
    data.groupby("Stock")["Daily_Return"]
        .transform(lambda x: x.rolling(30).std())
)


In [11]:
data["Cumulative_Return"] = (
    data.groupby("Stock")["Daily_Return"]
        .transform(lambda x: (1 + x).cumprod())
)


In [12]:
data["Cumulative_Max"] = data.groupby("Stock")["Adj_Close"].cummax()

data["Drawdown"] = (
    (data["Adj_Close"] - data["Cumulative_Max"])
    / data["Cumulative_Max"]
)


In [13]:
#Maximum Drawdown per Stock
max_drawdown = (
    data.groupby("Stock")["Drawdown"]
        .min()
        .reset_index(name="Max_Drawdown")
)

print(max_drawdown)


      Stock  Max_Drawdown
0  HDFCBANK     -0.410550
1    NVIDIA     -0.663351
2  RELIANCE     -0.450884
3     TESLA     -0.736322


In [14]:
#— Correlation Matrix
returns_pivot = data.pivot(
    index="Date",
    columns="Stock",
    values="Daily_Return"
)

correlation_matrix = returns_pivot.corr()

print(correlation_matrix)



Stock     HDFCBANK    NVIDIA  RELIANCE     TESLA
Stock                                           
HDFCBANK  1.000000       NaN  0.417186       NaN
NVIDIA         NaN  1.000000       NaN  0.463959
RELIANCE  0.417186       NaN  1.000000       NaN
TESLA          NaN  0.463959       NaN  1.000000


In [15]:
data.to_csv("stock_analytics_data.csv", index=False)
max_drawdown.to_csv("max_drawdown.csv", index=False)
correlation_matrix.to_csv("stock_correlation_matrix.csv")
