In [1]:
import pandas as pd
import openpyxl
from collections import defaultdict
import os

In [2]:
!pwd

/home/aayush_ad/BacktestFCC


In [3]:
stock_data_dict = defaultdict(list)

In [4]:
def load_excel_sheet(file_path):
    try:
        # Load the Excel file, skipping initial empty rows and setting the header
        data = pd.read_excel(file_path, header=3)
        return data
    except Exception as e:
        print(f"Error loading Excel sheet: {str(e)}")
        return None


In [5]:
from datetime import datetime

start_date = datetime(2012, 7, 2)

def organize_stock_data(df):
    current_stock = None
    for _, row in df.iterrows():
        if pd.notna(row['CD_Bloomberg Code']):
            current_stock = row['CD_Bloomberg Code']
            if current_stock not in stock_data_dict:
                stock_data_dict[current_stock] = []
        
        if current_stock is not None and pd.notna(row['NDP_Date']) and pd.notna(row['NDP_Close']):
            date = row['NDP_Date']
            close = row['NDP_Close']

            # Convert timestamp to date string
            date_str = date.strftime('%Y-%m-%d')

            # Calculate the number of days since the start date
            days_since_start = (date - start_date).days

            stock_data = {'Date': date_str, 'Days Since Start': days_since_start, 'Close': close}
            stock_data_dict[current_stock].append(stock_data)


In [6]:
def clean_and_concatenate_data():
    for stock, data in stock_data_dict.items():
        stock_data_dict[stock] = pd.concat(data, ignore_index=True).drop_duplicates().reset_index(drop=True)

In [7]:
file_names = ['Jan13.xlsx', 'Jul14.xlsx', 'Jan14.xlsx', 'Jul15.xlsx', 'Jan15.xlsx', 'Jul16.xlsx', 'Jul17.xlsx', 'Jan17.xlsx', 'Jul18.xlsx', 'Jan18.xlsx', 'Jul19.xlsx', 'Jan19.xlsx', 'Jul20.xlsx', 'Jan20.xlsx', 'Jul21.xlsx', 'Jan22.xlsx', 'Jul23.xlsx', 'Jan23.xlsx'] 

file_directory = 'BacktestingData'

In [8]:
for file_name in file_names:
    file_path = os.path.join(file_directory, file_name)
    data = load_excel_sheet(file_path)
    if data is not None:
        organize_stock_data(data)

Error loading Excel sheet: [Errno 2] No such file or directory: 'BacktestingData/Jan19.xlsx'


In [9]:
os.makedirs('Data', exist_ok=True)  # Ensure the 'Data' directory exists
os.chdir('Data')

In [10]:
for stock, data in stock_data_dict.items():
    formatted_data = pd.DataFrame(data)
    if not formatted_data.empty:
        excel_file_name = f"{stock}.xlsx"
        
        formatted_data.sort_values(by='Days Since Start', inplace=True)
        
        formatted_data.to_excel(excel_file_name, index=False)
        print(f"Data for {stock} saved to {excel_file_name}")
    else:
        print(f"No data available for {stock}")

Data for 360ONE IN saved to 360ONE IN.xlsx
Data for 3M IN saved to 3M IN.xlsx
Data for 63MOONS IN saved to 63MOONS IN.xlsx
Data for ARTD IN saved to ARTD IN.xlsx
Data for ARTO IN saved to ARTO IN.xlsx
Data for AAVAS IN saved to AAVAS IN.xlsx
Data for ABAN IN saved to ABAN IN.xlsx
Data for BOOT IN saved to BOOT IN.xlsx
Data for ABGS IN saved to ABGS IN.xlsx
Data for ACC IN saved to ACC IN.xlsx
Data for ADANIT IN saved to ADANIT IN.xlsx
Data for ADE IN saved to ADE IN.xlsx
Data for ADANIGR IN saved to ADANIGR IN.xlsx
Data for ADSEZ IN saved to ADSEZ IN.xlsx
Data for ADANI IN saved to ADANI IN.xlsx
Data for ATGL IN saved to ATGL IN.xlsx
Data for AWLTD IN saved to AWLTD IN.xlsx
Data for ABCAP IN saved to ABCAP IN.xlsx
Data for ABFRL IN saved to ABFRL IN.xlsx
Data for ABML IN saved to ABML IN.xlsx
Data for ABSLAMC IN saved to ABSLAMC IN.xlsx
Data for ADVENZY IN saved to ADVENZY IN.xlsx
Data for RMDA IN saved to RMDA IN.xlsx
Data for AGIS IN saved to AGIS IN.xlsx
Data for AETHER IN saved to 