# READ CSV & CALCULATION


In [1]:
import os
import pandas as pd

def process_csv(file_path, date_formatted):
    try:
        df = pd.read_csv(file_path, skiprows=2)

        if 'CLIENT_ID' in df.columns and 'COMPANY_CODE' in df.columns and 'NOT_PROFIT' in df.columns:
            # Add the date as a new column
            df['DATE'] = date_formatted
            
            # Group by CLIENT_ID, COMPANY_CODE, and DATE, summing NOT_PROFIT
            cumulative_df = df.groupby(['CLIENT_ID', 'COMPANY_CODE', 'DATE'], as_index=False)['NOT_PROFIT'].sum()

            # Pivot the DataFrame
            output_df = cumulative_df.pivot(index=['CLIENT_ID', 'DATE'], columns='COMPANY_CODE', values='NOT_PROFIT').reset_index()
            output_df.columns.name = None  # Remove the columns' name
            
            # Fill NaN with 0 for missing values
            output_df.fillna(0, inplace=True)  

            # Rename the columns for clarity
            output_df.columns = ['CLIENT_ID', 'DATE', 'DERIVATIVES', 'EXPENSES'] if 'DERIVATIVES' in output_df.columns and 'EXPENSES' in output_df.columns else output_df.columns

            return output_df

        else:
            print(f"Required columns not found in {file_path}")
            return pd.DataFrame()  # Return an empty DataFrame

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on failure

def convert_excel_to_csv(folder_path):
    all_dataframes = []  # List to collect all DataFrames
    # Walk through the directory
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                
                # Extract the date from the filename (assuming format is like '01042024')
                date_str = os.path.splitext(file)[0]  # '01042024'
                
                # Convert the date string to the format DD-MM-YYYY
                date_formatted = pd.to_datetime(date_str, format='%d%m%Y').strftime('%d-%m-%Y')

                # Process the CSV file
                processed_df = process_csv(file_path, date_formatted)
                if not processed_df.empty:
                    all_dataframes.append(processed_df)

    # Concatenate all DataFrames into a single DataFrame
    if all_dataframes:
        final_output_df = pd.concat(all_dataframes, ignore_index=True)

        # Group the final DataFrame again to sum up all entries across all files by CLIENT_ID and DATE
        final_cumulative_df = final_output_df.groupby(['CLIENT_ID', 'DATE'], as_index=False).sum()

        # Calculate the PROFIT as DERIVATIVES - EXPENSES
        final_cumulative_df['PROFIT'] = final_cumulative_df['DERIVATIVES'] + final_cumulative_df['EXPENSES']

        # Reorder the columns as specified: DATE, CLIENT_ID, DERIVATIVES, EXPENSES, PROFIT
        final_cumulative_df = final_cumulative_df[['DATE', 'CLIENT_ID', 'DERIVATIVES', 'EXPENSES', 'PROFIT']]

        # Sort the DataFrame by DATE and CLIENT_ID
        final_cumulative_df['DATE'] = pd.to_datetime(final_cumulative_df['DATE'], format='%d-%m-%Y')  # Convert back for sorting
        final_cumulative_df.sort_values(by=['DATE', 'CLIENT_ID'], inplace=True)

        # Print the final output DataFrame for inspection
        print(final_cumulative_df)

        # Optionally, save the cumulative DataFrame as a CSV file
        output_file_path = os.path.join(folder_path, 'PR17.csv')  # Adjust path as needed
        final_cumulative_df.to_csv(output_file_path, index=False)
        print(f"Cumulative data saved to: {output_file_path}")
    else:
        print("No data to process.")

# Use a raw string for the folder path
folder_path = r'F:\DATA TEAM\Process NSE\Current Date File'  # Change this to your folder path
convert_excel_to_csv(folder_path)


          DATE CLIENT_ID  DERIVATIVES  EXPENSES      PROFIT
0   2024-12-17  ALLOPT01   -484690.75 -15667.98  -500358.73
1   2024-12-17  ALLOPT02     34731.75  -4558.61    30173.14
2   2024-12-17  ALLOPT03   -317767.00  -2412.74  -320179.74
3   2024-12-17  ALLOPT19     40520.00   -186.36    40333.64
4   2024-12-17  ALLOPT20   -153255.25  -9261.06  -162516.31
..         ...       ...          ...       ...         ...
103 2024-12-17  NFTOPT16   1295821.25  -7522.24  1288299.01
104 2024-12-17  NFTOPT17   -425950.00  -1746.61  -427696.61
105 2024-12-17  NFTOPT18    737266.25  -6151.63   731114.62
106 2024-12-17  NFTOPT19    282157.50  -1814.78   280342.72
107 2024-12-17  NFTOPT20    829320.00  -2004.78   827315.22

[108 rows x 5 columns]
Cumulative data saved to: F:\DATA TEAM\Process NSE\Current Date File\PR17.csv


# CLUB

In [3]:
import os
import pandas as pd 

# Path to the main folder
main_folder_path = r'F:\DATA TEAM\Process NSE\CLUB Profit'  # Use raw string literal to handle backslashes

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame() 

# Function to recursively search for CSV files and merge them
def merge_csv_files(directory):
    global merged_df
    # Walk through the directory tree
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file is a CSV file (case insensitive)
            if file.lower().endswith('.csv'):
                # Construct the full path to the CSV file
                file_path = os.path.join(root, file)
                try:
                    # Read the CSV file into a DataFrame
                    df = pd.read_csv(file_path)
                    if not df.empty:  # Check if DataFrame is not empty
                        # Append the DataFrame to the merged DataFrame
                        merged_df = pd.concat([merged_df, df], ignore_index=True)
                        print(f'Merged: {file_path}')  # Optional: print each file being merged
                    else:
                        print(f'Skipped empty file: {file_path}')
                except pd.errors.EmptyDataError:
                    print(f'Skipped file with empty data: {file_path}')
                except Exception as e:
                    print(f'Error processing file {file_path}: {e}') 

# Call the function to start merging CSV files
merge_csv_files(main_folder_path) 


# Save the merged DataFrame to a new CSV file
output_file_path = os.path.join(main_folder_path, 'Profit_17.csv')
merged_df.to_csv(output_file_path, index=False) 

print(f"All CSV files have been merged into '{output_file_path}'")


Merged: F:\DATA TEAM\Process NSE\CLUB Profit\PR17.csv
Merged: F:\DATA TEAM\Process NSE\CLUB Profit\Profit_16.csv
All CSV files have been merged into 'F:\DATA TEAM\Process NSE\CLUB Profit\Profit_17.csv'
