In [1]:
import pandas as pd
import openpyxl
from collections import defaultdict
import os

In [2]:
!pwd

/home/aayush_ad/BacktestFCC


In [3]:
stock_data_dict = defaultdict(list)

In [4]:
def load_excel_sheet(file_path):
    try:
        # Load the Excel file, skipping initial empty rows and setting the header
        data = pd.read_excel(file_path, header=3)
        return data
    except Exception as e:
        print(f"Error loading Excel sheet: {str(e)}")
        return None


In [5]:
from datetime import datetime

start_date = datetime(2012, 7, 2)

def organize_stock_data(df):
    current_stock = None
    for _, row in df.iterrows():
        if pd.notna(row['CD_Bloomberg Code']):
            current_stock = row['CD_Bloomberg Code']
            if current_stock not in stock_data_dict:
                stock_data_dict[current_stock] = []
        
        if current_stock is not None and pd.notna(row['NDP_Date']) and pd.notna(row['NDP_Close']):
            date = row['NDP_Date']
            close = row['NDP_Close']

            # Convert timestamp to date string
            date_str = date.strftime('%Y-%m-%d')

            # Calculate the number of days since the start date
            days_since_start = (date - start_date).days

            stock_data = {'Date': date_str, 'Days Since Start': days_since_start, 'Close': close}
            stock_data_dict[current_stock].append(stock_data)


In [6]:
def clean_and_concatenate_data():
    for stock, data in stock_data_dict.items():
        stock_data_dict[stock] = pd.concat(data, ignore_index=True).drop_duplicates().reset_index(drop=True)

In [7]:
file_names = []

for file in os.listdir('BacktestingData'):
    if 'xlsx' in file:
        file_names.append(file)

print(len(file_names))
file_directory = 'BacktestingData'

22


In [8]:
import exchange_calendars as ecals
import pandas as pd

# Get the XBOM trading calendar
xbom_cal = ecals.get_calendar("XBOM")

# Define the date range
start_date = pd.Timestamp("2012-07-02")
end_date = pd.Timestamp("2023-08-10")

# Get the valid trading sessions as strings
trading_sessions_str = xbom_cal.sessions_in_range(start_date, end_date).astype(str)

# Convert back to Timestamp objects with timezone information
trading_sessions = pd.to_datetime(trading_sessions_str)

# Print or use the trading sessions as needed
print(trading_sessions)


DatetimeIndex(['2012-07-02', '2012-07-03', '2012-07-04', '2012-07-05',
               '2012-07-06', '2012-07-09', '2012-07-10', '2012-07-11',
               '2012-07-12', '2012-07-13',
               ...
               '2023-07-28', '2023-07-31', '2023-08-01', '2023-08-02',
               '2023-08-03', '2023-08-04', '2023-08-07', '2023-08-08',
               '2023-08-09', '2023-08-10'],
              dtype='datetime64[ns]', length=2736, freq=None)


In [9]:
if pd.Timestamp("2013-07-04") in trading_sessions:
    print("2013-07-04 is a valid trading session")
else:
    print("2013-07-04 is not a valid trading session")

2013-07-04 is a valid trading session


In [10]:
for file_name in file_names:
    file_path = os.path.join(file_directory, file_name)
    data = load_excel_sheet(file_path)
    if data is not None:
        organize_stock_data(data)

In [11]:
os.makedirs('Data', exist_ok=True)  # Ensure the 'Data' directory exists
os.chdir('Data')
os.makedirs('daily', exist_ok=True)  # Ensure the 'daily' directory exists
os.chdir('daily')

In [12]:
global_min_date = pd.Timestamp('2012-07-02')
global_max_date = pd.Timestamp('2023-08-10')
given_saturday = pd.Timestamp('2012-07-07')
given_sunday = pd.Timestamp('2012-07-08')

saturday_weekday = given_saturday.weekday()
sunday_weekday = given_sunday.weekday()

In [13]:
ideal_valid_days = pd.to_datetime(trading_sessions_str)
ideal_valid_days = ideal_valid_days.strftime('%Y-%m-%d')
print(ideal_valid_days)
# ideal_valid_days = trading_sessions

Index(['2012-07-02', '2012-07-03', '2012-07-04', '2012-07-05', '2012-07-06',
       '2012-07-09', '2012-07-10', '2012-07-11', '2012-07-12', '2012-07-13',
       ...
       '2023-07-28', '2023-07-31', '2023-08-01', '2023-08-02', '2023-08-03',
       '2023-08-04', '2023-08-07', '2023-08-08', '2023-08-09', '2023-08-10'],
      dtype='object', length=2736)


In [14]:
ideal_valid_days = pd.to_datetime(trading_sessions_str)
print(ideal_valid_days)
if pd.Timestamp("2013-07-04") in ideal_valid_days:
    print("2013-07-04 is a valid trading session")
else:
    print("2013-07-04 is not a valid trading session")

DatetimeIndex(['2012-07-02', '2012-07-03', '2012-07-04', '2012-07-05',
               '2012-07-06', '2012-07-09', '2012-07-10', '2012-07-11',
               '2012-07-12', '2012-07-13',
               ...
               '2023-07-28', '2023-07-31', '2023-08-01', '2023-08-02',
               '2023-08-03', '2023-08-04', '2023-08-07', '2023-08-08',
               '2023-08-09', '2023-08-10'],
              dtype='datetime64[ns]', length=2736, freq=None)
2013-07-04 is a valid trading session


In [15]:
for stock, data in stock_data_dict.items():
    formatted_data = pd.DataFrame(data)
    if not formatted_data.empty:
        csv_file_name = f"{stock[:-3]}.csv"
        
        # Sort data by date
        formatted_data = formatted_data.sort_values(by='Date')
        
        # Create additional columns
        formatted_data['open'] = formatted_data['Close']
        formatted_data['high'] = formatted_data['Close']
        formatted_data['low'] = formatted_data['Close']
        formatted_data['volume'] = 1000000    
        formatted_data['dividends'] = 0
        
        # Convert 'Date' to datetime
        formatted_data['Date'] = pd.to_datetime(formatted_data['Date'])
        
        # Set 'Date' as index
        formatted_data.set_index('Date', inplace=True)
        
        # Remove duplicate indices
        formatted_data = formatted_data[~formatted_data.index.duplicated(keep='last')]
        
        # Reindex based on ideal stock's valid trading days and forward fill
        formatted_data = formatted_data.reindex(ideal_valid_days, method='ffill')
        
        # Backward fill for initial values
        formatted_data = formatted_data.bfill()
        
        # Reset index
        formatted_data.reset_index(inplace=True)
        
        # Rename columns
        formatted_data = formatted_data.rename(columns={'index': 'Date', 'Close': 'close', 'dividends': 'dividend'})
        
        # Save to CSV
        formatted_data.to_csv(csv_file_name, index=False)
        print(f"Data for {stock} saved to {csv_file_name}")
    else:
        print(f"No data available for {stock}")

Data for 3M IN saved to 3M.csv
Data for ARTO IN saved to ARTO.csv
Data for ABAN IN saved to ABAN.csv
Data for ABB IN saved to ABB.csv
Data for ACC IN saved to ACC.csv
Data for ADE IN saved to ADE.csv
Data for ADSEZ IN saved to ADSEZ.csv
Data for ADANI IN saved to ADANI.csv
Data for ABML IN saved to ABML.csv
Data for RMDA IN saved to RMDA.csv
Data for ATFL IN saved to ATFL.csv
Data for AIAE IN saved to AIAE.csv
Data for AKZO IN saved to AKZO.csv
Data for AACL IN saved to AACL.csv
Data for AGLL IN saved to AGLL.csv
Data for ALOK IN saved to ALOK.csv
Data for AMRJ IN saved to AMRJ.csv
Data for ACEM IN saved to ACEM.csv
Data for ARCP IN saved to ARCP.csv
Data for APIL IN saved to APIL.csv
Data for APHS IN saved to APHS.csv
Data for APTY IN saved to APTY.csv
Data for APTR IN saved to APTR.csv
Data for ARVND IN saved to ARVND.csv
Data for AISG IN saved to AISG.csv
Data for AL IN saved to AL.csv
Data for ASBL IN saved to ASBL.csv
Data for APNT IN saved to APNT.csv
Data for ASTRA IN saved to A

In [16]:
formatted_data

Unnamed: 0,Date,Days Since Start,close,open,high,low,volume,dividend
0,2012-07-02,0,185.95,185.95,185.95,185.95,1000000,0
1,2012-07-03,1,189.05,189.05,189.05,189.05,1000000,0
2,2012-07-04,2,189.65,189.65,189.65,189.65,1000000,0
3,2012-07-05,3,197.35,197.35,197.35,197.35,1000000,0
4,2012-07-06,4,195.80,195.80,195.80,195.80,1000000,0
...,...,...,...,...,...,...,...,...
2731,2023-08-04,4050,117.70,117.70,117.70,117.70,1000000,0
2732,2023-08-07,4053,121.25,121.25,121.25,121.25,1000000,0
2733,2023-08-08,4054,118.70,118.70,118.70,118.70,1000000,0
2734,2023-08-09,4055,117.65,117.65,117.65,117.65,1000000,0
