In [1]:
import os
import pandas as pd

def convert_file_to_csv(file_path, csv_file_path):
    try:
        # Attempt to read the file as an Excel file
        if file_path.endswith('.xls') or file_path.endswith('.xlsx'):
            df = pd.read_excel(file_path, engine='openpyxl' if file_path.endswith('.xlsx') else 'xlrd')
        else:
            df = pd.read_csv(file_path, sep='\t')  # In case it's tab-separated

        # Save to CSV
        df.to_csv(csv_file_path, index=False)
        print(f"Converted {file_path} to {csv_file_path} as Excel")
    except Exception as e:
        print(f"Failed to convert {file_path} as Excel: {e}")
        
        # Try reading as an HTML file if it contains web page data
        try:
            df = pd.read_html(file_path)[0]  # Read the first table
            df.to_csv(csv_file_path, index=False)
            print(f"Converted {file_path} to {csv_file_path} as HTML")
        except Exception as e:
            print(f"Failed to convert {file_path} as HTML: {e}")

def convert_excel_to_csv(folder_path):
    # Walk through the directory
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # Check for Excel file extensions
            if file.endswith('.xls') or file.endswith('.xlsx') or file.endswith('.html'):
                # Define the full path to the file
                file_path = os.path.join(root, file)
                
                # Define the CSV file path
                csv_file_path = os.path.join(root, f"{os.path.splitext(file)[0]}.csv")
                
                # Attempt conversion
                convert_file_to_csv(file_path, csv_file_path)

# Use a raw string for the folder path
folder_path = r'F:\DATA TEAM\Margin'  # Change this to your folder path
convert_excel_to_csv(folder_path)


Converted F:\VANSHITA\Margin - Copy\01-04-2024.xlsx to F:\VANSHITA\Margin - Copy\01-04-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\01-07-2024.xlsx to F:\VANSHITA\Margin - Copy\01-07-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\01-08-2024.xlsx to F:\VANSHITA\Margin - Copy\01-08-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\01-10-2024.xlsx to F:\VANSHITA\Margin - Copy\01-10-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\01012025.xlsx to F:\VANSHITA\Margin - Copy\01012025.csv as Excel
Converted F:\VANSHITA\Margin - Copy\02-04-2024.xlsx to F:\VANSHITA\Margin - Copy\02-04-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\02-05-2024.xlsx to F:\VANSHITA\Margin - Copy\02-05-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\02-07-2024.xlsx to F:\VANSHITA\Margin - Copy\02-07-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\02-08-2024.xlsx to F:\VANSHITA\Margin - Copy\02-08-2024.csv as Excel
Converted F:\VANSHITA\Margin - Copy\02-09-2024.xlsx to F:\V

In [3]:
import os
import pandas as pd

def remove_first_two_rows(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                # Read the CSV file, skipping the first two rows
                df = pd.read_csv(file_path, skiprows=3)
                # Save the updated DataFrame back to the CSV
                df.to_csv(file_path, index=False)

# Specify the path to your main folder
main_folder = r'F:\DATA TEAM\Margin'
remove_first_two_rows(main_folder)


In [5]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'F:\DATA TEAM\Margin'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 

# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, 'ClubMargin_24.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")


Merged: F:\VANSHITA\Margin - Copy\01-04-2024.csv
Merged: F:\VANSHITA\Margin - Copy\01-07-2024.csv
Merged: F:\VANSHITA\Margin - Copy\01-08-2024.csv
Merged: F:\VANSHITA\Margin - Copy\01-10-2024.csv
Merged: F:\VANSHITA\Margin - Copy\01012025.csv
Merged: F:\VANSHITA\Margin - Copy\02-04-2024.csv
Merged: F:\VANSHITA\Margin - Copy\02-05-2024.csv
Merged: F:\VANSHITA\Margin - Copy\02-07-2024.csv
Merged: F:\VANSHITA\Margin - Copy\02-08-2024.csv
Merged: F:\VANSHITA\Margin - Copy\02-09-2024.csv
Merged: F:\VANSHITA\Margin - Copy\02-12-2024.csv
Merged: F:\VANSHITA\Margin - Copy\02012025.csv
Merged: F:\VANSHITA\Margin - Copy\03-04-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03-05-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03-06-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03-07-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03-09-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03-10-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03-12-2024.csv
Merged: F:\VANSHITA\Margin - Copy\03012025.csv
Merged: F:\VANSHITA\Margin