In [1]:
import os
import pandas as pd

def convert_file_to_csv(file_path, csv_file_path):
    try:
        # Attempt to read the file as an Excel file
        if file_path.endswith('.xls') or file_path.endswith('.xlsx'):
            df = pd.read_excel(file_path, engine='openpyxl' if file_path.endswith('.xlsx') else 'xlrd')
        else:
            df = pd.read_csv(file_path, sep='\t')  # In case it's tab-separated

        # Save to CSV
        df.to_csv(csv_file_path, index=False)
        print(f"Converted {file_path} to {csv_file_path} as Excel")
    except Exception as e:
        print(f"Failed to convert {file_path} as Excel: {e}")
        
        # Try reading as an HTML file if it contains web page data
        try:
            df = pd.read_html(file_path)[0]  # Read the first table
            df.to_csv(csv_file_path, index=False)
            print(f"Converted {file_path} to {csv_file_path} as HTML")
        except Exception as e:
            print(f"Failed to convert {file_path} as HTML: {e}")

def convert_excel_to_csv(folder_path):
    # Walk through the directory
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # Check for Excel file extensions
            if file.endswith('.xls') or file.endswith('.xlsx') or file.endswith('.html'):
                # Define the full path to the file
                file_path = os.path.join(root, file)
                
                # Define the CSV file path
                csv_file_path = os.path.join(root, f"{os.path.splitext(file)[0]}.csv")
                
                # Attempt conversion
                convert_file_to_csv(file_path, csv_file_path)

# Use a raw string for the folder path
folder_path = r'E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File'  # Change this to your folder path
convert_excel_to_csv(folder_path)


Failed to convert E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File\19062025.xls as Excel: Pandas requires version '2.0.1' or newer of 'xlrd' (version '1.2.0' currently installed).
Converted E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File\19062025.xls to E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File\19062025.csv as HTML


In [2]:
import os
import pandas as pd

def process_csv_files(folder_path):
    all_data = []  # List to hold all dataframes

    
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.csv'):
                # Extract the date from the filename (remove the .csv extension)
                date_value = file[:-4]  # e.g., '01042024'
                
                # Convert to datetime using the correct format (DDMMYYYY)
                formatted_date = pd.to_datetime(date_value, format='%d%m%Y').strftime('%d-%m-%Y')
                
                file_path = os.path.join(root, file)

                # Read the CSV file, skipping the first two rows
                df = pd.read_csv(file_path, skiprows=2)
                
                # Add the TradeDate column with the formatted date
                df['TradeDate'] = formatted_date
                
                # Append the modified DataFrame to the list
                all_data.append(df)

    # Combine all dataframes into one
    combined_df = pd.concat(all_data, ignore_index=True)

    # Save the combined DataFrame to a new CSV file 
    combined_df.to_csv(os.path.join(folder_path, '19.csv'), index=False)

# Specify the path to your main folder
main_folder = r'E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File'  # or use forward slashes
process_csv_files(main_folder)


In [3]:
import pandas as pd

# Load the CSV file
df = pd.read_csv(r'E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File\19.csv')

# Check the data type of 'TradeDate'
print("Data type of 'TradeDate':", df['TradeDate'].dtype)

# If 'TradeDate' is not a string, convert it to string first
if not pd.api.types.is_string_dtype(df['TradeDate']):
    df['TradeDate'] = df['TradeDate'].astype(str)

# Now convert 'TradeDate' to datetime format
try:
    df['TradeDate'] = pd.to_datetime(df['TradeDate'].str.replace('/', '-'), format='%d-%m-%Y')
except KeyError as e:
    print(f"KeyError: {e}")
except Exception as e:
    print(f"Error: {e}")

# Display the updated DataFrame (optional, you can remove this if not needed)
print("Updated DataFrame:\n", df.head())

# Save the updated DataFrame back to the same CSV file
df.to_csv(r'E:\Back-Up Files\Data Team\2024-2025\GREEK\Curent Date File\19.csv', index=False)

print("The date format has been updated and saved back to the same file.")



Data type of 'TradeDate': object
Updated DataFrame:
   CLIENT_ID COMPANY_CODE                SCRIP_SYMBOL  NET_QUANTITY  NET_RATE  \
0  FUTOPT01  DERIVATIVES  AUBANK 26Jun25 CE 700.0000             0      0.00   
1  FUTOPT01  DERIVATIVES  AUBANK 26Jun25 CE 770.0000         -1000     29.00   
2  FUTOPT01  DERIVATIVES  AUBANK 26Jun25 CE 800.0000         -1000     14.45   
3  FUTOPT01  DERIVATIVES  AUBANK 26Jun25 PE 700.0000             0      0.00   
4  FUTOPT01  DERIVATIVES  AUBANK 26Jun25 PE 770.0000         -1000      5.65   

   NET_AMOUNT  CLOSING_PRICE  NOT_PROFIT  TRADING_QUANTITY  TRADING_AMOUNT  \
0    -10050.0            0.0    -10050.0             -1000           94.65   
1     29000.0           27.0      2000.0             -1000           29.00   
2     14450.0            8.7      5750.0                 0            0.00   
3       100.0            0.0       100.0             -1000            0.50   
4      5650.0            4.8       850.0             -1000            5.65  

In [4]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'E:\Back-Up Files\Data Team\2024-2025\GREEK\CLUB'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 

# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, '25_ClubG_19.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")


Merged: E:\Back-Up Files\Data Team\2024-2025\GREEK\CLUB\19.csv
Merged: E:\Back-Up Files\Data Team\2024-2025\GREEK\CLUB\25_ClubG18.csv
All CSV files have been merged into 'E:\Back-Up Files\Data Team\2024-2025\GREEK\CLUB\25_ClubG_19.csv'


In [2]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'E:\Back-Up Files\Data Team\2024-2025\GREEK\2425'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 

# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, '25_ClubG_start to till date.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")


Merged: E:\Back-Up Files\Data Team\2024-2025\GREEK\2425\25_ClubG_22.csv
Merged: E:\Back-Up Files\Data Team\2024-2025\GREEK\2425\ClubG28.csv
All CSV files have been merged into 'E:\Back-Up Files\Data Team\2024-2025\GREEK\2425\25_ClubG_start to till date.csv'
