In [16]:
import pandas as pd

candle_stick_data = pd.read_csv("dataset/nifty50_candlestick_data.csv")
candle_stick_data["datetime"] = pd.to_datetime(candle_stick_data["Date"] + " " + candle_stick_data["Time"], format="%d-%m-%Y %H:%M:%S")
candle_stick_data.set_index("datetime", inplace=True)
candle_stick_data.drop(columns=["Date", "Time", "High", "Low", "Close", "Instrument"], inplace=True, errors="ignore")

n50_minute_level_opens = candle_stick_data
n50_minute_level_opens.head()

Unnamed: 0_level_0,Open
datetime,Unnamed: 1_level_1
2015-01-09 09:15:00,8285.45
2015-01-09 09:16:00,8292.6
2015-01-09 09:17:00,8287.4
2015-01-09 09:18:00,8294.25
2015-01-09 09:19:00,8300.6


In [None]:
# Check the time range and structure of the data
print("Data shape:", n50_minute_level_opens.shape)
print("Date range:", n50_minute_level_opens.index.min(), "to", n50_minute_level_opens.index.max())
print("\nUnique times in a day:")
sample_day_times = n50_minute_level_opens.index[n50_minute_level_opens.index.date == n50_minute_level_opens.index.date[0]]
print(f"Times available: {sample_day_times.time[0]} to {sample_day_times.time[-1]}")
print(f"Total minutes per day: {len(sample_day_times)}")

# Filter data for standard market hours (9:15 AM to 3:30 PM)
market_hours_filter = (n50_minute_level_opens.index.time >= pd.Timestamp('09:15:00').time()) & \
                      (n50_minute_level_opens.index.time <= pd.Timestamp('15:30:00').time())

n50_market_hours = n50_minute_level_opens[market_hours_filter].copy()

print(f"\nAfter filtering to market hours (9:15 AM - 3:30 PM):")
print(f"Filtered data shape: {n50_market_hours.shape}")

# Extract date and time components (removing seconds from time)
n50_market_hours['date'] = n50_market_hours.index.date
n50_market_hours['time'] = n50_market_hours.index.strftime('%H:%M')

# Create pivot table with date as index and time as columns
n50_daily_opens = n50_market_hours.pivot_table(
    index='date',
    columns='time',
    values='Open',
    aggfunc='first'  # In case there are duplicates, take the first value
)

print(f"\nn50_daily_opens shape: {n50_daily_opens.shape}")
print(f"Index (dates): {n50_daily_opens.index[0]} to {n50_daily_opens.index[-1]}")
print(f"Columns (times): {len(n50_daily_opens.columns)} time points")
print(f"Time range in columns: {n50_daily_opens.columns[0]} to {n50_daily_opens.columns[-1]}")

n50_daily_opens.head()

Data shape: (852087, 3)
Date range: 2015-01-09 09:15:00 to 2024-03-27 12:59:00

Unique times in a day:


Times available: 09:15:00 to 15:29:00
Total minutes per day: 375

After filtering to market hours (9:15 AM - 3:30 PM):
Filtered data shape: (851460, 3)

After filtering to market hours (9:15 AM - 3:30 PM):
Filtered data shape: (851460, 3)

n50_daily_opens shape: (2273, 375)
Index (dates): 2015-01-09 to 2024-03-27
Columns (times): 375 time points
Time range in columns: 09:15 to 15:29

n50_daily_opens shape: (2273, 375)
Index (dates): 2015-01-09 to 2024-03-27
Columns (times): 375 time points
Time range in columns: 09:15 to 15:29


time,09:15,09:16,09:17,09:18,09:19,09:20,09:21,09:22,09:23,09:24,...,15:20,15:21,15:22,15:23,15:24,15:25,15:26,15:27,15:28,15:29
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-09,8285.45,8292.6,8287.4,8294.25,8300.6,8300.5,8300.65,8302.45,8294.85,8295.2,...,8280.8,8282.35,8283.4,8284.35,8286.9,8286.65,8283.45,8282.35,8283.25,8280.5
2015-01-12,8291.35,8254.2,8255.25,8258.15,8263.2,8267.45,8266.05,8268.8,8273.85,8266.75,...,8329.5,8326.55,8328.05,8328.05,8327.2,8330.2,8330.9,8329.95,8329.95,8328.85
2015-01-13,8346.15,8355.15,8348.7,8344.5,8342.5,8340.35,8339.75,8340.45,8333.3,8326.05,...,8304.9,8305.75,8306.5,8307.15,8308.0,8308.2,8308.25,8307.25,8305.85,8308.2
2015-01-14,8307.25,8300.85,8307.0,8309.05,8305.4,8304.7,8302.2,8293.1,8296.7,8306.85,...,8280.1,8278.9,8280.9,8283.6,8284.3,8285.35,8285.5,8286.95,8288.3,8288.9
2015-01-15,8425.2,8440.45,8394.35,8386.05,8401.1,8428.0,8408.25,8398.0,8416.7,8421.95,...,8497.6,8491.8,8482.05,8477.25,8468.0,8463.8,8469.05,8464.8,8467.25,8467.45
