In [1]:
import pandas as pd

# Define the multi-index for the columns
columns = pd.MultiIndex.from_tuples([
    ('', '', 'Risk_Factor'),
    ('', '', 'Currency'),
    ('', '', 'Maturity'),
    ('Expiry', '10y', ''),
    ('Expiry', '15y', '')
], names=['Level_0', 'Level_1', 'Level_2'])

# Define the input dataframes with multi-indexed columns
spread_df = pd.DataFrame([
    ['cor', 'ER', '2y', 1, 2],
    ['cor', 'ER', '5y', 2, 3],
    ['cor', 'US', '2y', 3, 4],
    ['cor', 'US', '5y', 4, 5]
], columns=columns)

risk_df = pd.DataFrame([
    ['cor', 'ER', '2y', 10, 20],
    ['cor', 'ER', '5y', 11, 21],
    ['cor', 'US', '2y', 12, 22],
    ['cor', 'US', '5y', 13, 23]
], columns=columns)

# Function to calculate the output dataframes
def calculate_exit_costs(spread_df, risk_df):
    # Perform element-wise multiplication for the '10y' and '15y' columns
    exit_cost_wide_df = spread_df.iloc[:, 3:] * risk_df.iloc[:, 3:]
    
    # Concatenate the non-multiplied columns with the calculated exit costs
    exit_cost_wide_df = pd.concat([spread_df.iloc[:, :3], exit_cost_wide_df], axis=1)
    
    # Convert the wide format dataframe to long format
    exit_cost_long_df = pd.melt(exit_cost_wide_df, id_vars=[('', '', 'Risk_Factor'), 
                                                            ('', '', 'Currency'), 
                                                            ('', '', 'Maturity')],
                                var_name=['Level_0', 'Level_1'], value_name='Exit Cost')
    
    # Check the structure of the melted dataframe
    print("Structure of melted dataframe:")
    print(exit_cost_long_df.head())
    
    # Rename the columns based on the structure
    exit_cost_long_df.columns = ['Risk_Factor', 'Currency', 'Maturity', 'Level_0', 'Expiry', 'Exit Cost']
    
    return exit_cost_wide_df, exit_cost_long_df

# Call the function and get the output dataframes
exit_cost_wide_df, exit_cost_long_df = calculate_exit_costs(spread_df, risk_df)

# Display the resulting dataframes
print("Exit Cost (wide format):")
print(exit_cost_wide_df)
print("\nExit Cost (long format):")
print(exit_cost_long_df)


Structure of melted dataframe:
  (, , Risk_Factor) (, , Currency) (, , Maturity) Level_0 Level_1  Exit Cost
0               cor             ER             2y  Expiry     10y         10
1               cor             ER             5y  Expiry     10y         22
2               cor             US             2y  Expiry     10y         36
3               cor             US             5y  Expiry     10y         52
4               cor             ER             2y  Expiry     15y         40
Exit Cost (wide format):
Level_0                               Expiry     
Level_1                                  10y  15y
Level_2 Risk_Factor Currency Maturity            
0               cor       ER       2y     10   40
1               cor       ER       5y     22   63
2               cor       US       2y     36   88
3               cor       US       5y     52  115

Exit Cost (long format):
  Risk_Factor Currency Maturity Level_0 Expiry  Exit Cost
0         cor       ER       2y  Expiry    10y  

In [2]:
import pandas as pd

# Define the multi-index for the columns
columns = pd.MultiIndex.from_tuples([
    ('Risk_Factor', '', ''),
    ('Currency', '', ''),
    ('Maturity', '', ''),
    ('Expiry', '10y', ''),
    ('Expiry', '15y', '')
], names=['Level_0', 'Level_1', 'Level_2'])

# Define the input dataframes with multi-indexed columns
spread_df = pd.DataFrame([
    ['cor', 'ER', '2y', 1, 2],
    ['cor', 'ER', '5y', 2, 3],
    ['cor', 'US', '2y', 3, 4],
    ['cor', 'US', '5y', 4, 5]
], columns=columns)

risk_df = pd.DataFrame([
    ['cor', 'ER', '2y', 10, 20],
    ['cor', 'ER', '5y', 11, 21],
    ['cor', 'US', '2y', 12, 22],
    ['cor', 'US', '5y', 13, 23]
], columns=columns)

# Function to calculate the output dataframes
def calculate_exit_costs(spread_df, risk_df):
    # Perform element-wise multiplication for the '10y' and '15y' columns
    exit_cost_wide_df = spread_df.iloc[:, 3:] * risk_df.iloc[:, 3:]
    
    # Concatenate the non-multiplied columns with the calculated exit costs
    exit_cost_wide_df = pd.concat([spread_df.iloc[:, :3], exit_cost_wide_df], axis=1)
    
    # Convert the wide format dataframe to long format
    exit_cost_long_df = pd.melt(exit_cost_wide_df, id_vars=[('Risk_Factor', '', ''), 
                                                            ('Currency', '', ''), 
                                                            ('Maturity', '', '')],
                                var_name=['Level_0', 'Expiry', 'Level_2'], value_name='Exit Cost')
    
    # Drop the 'Level_0' and 'Level_2' columns as they are not needed
    exit_cost_long_df = exit_cost_long_df.drop(columns=['Level_0', 'Level_2'])
    
    # Rename the columns to remove the 'Level' related information
    exit_cost_long_df.columns = ['Risk_Factor', 'Currency', 'Maturity', 'Expiry', 'Exit Cost']
    
    return exit_cost_wide_df, exit_cost_long_df

# Call the function and get the output dataframes
exit_cost_wide_df, exit_cost_long_df = calculate_exit_costs(spread_df, risk_df)

# Display the input dataframes
print("Spread Data:")
print(spread_df)
print("\nRisk Data:")
print(risk_df)


# Display the resulting dataframes
print("Exit Cost (wide format):")
print(exit_cost_wide_df)
print("\nExit Cost (long format):")
print(exit_cost_long_df)


Spread Data:
Level_0 Risk_Factor Currency Maturity Expiry    
Level_1                                  10y 15y
Level_2                                         
0               cor       ER       2y      1   2
1               cor       ER       5y      2   3
2               cor       US       2y      3   4
3               cor       US       5y      4   5

Risk Data:
Level_0 Risk_Factor Currency Maturity Expiry    
Level_1                                  10y 15y
Level_2                                         
0               cor       ER       2y     10  20
1               cor       ER       5y     11  21
2               cor       US       2y     12  22
3               cor       US       5y     13  23
Exit Cost (wide format):
Level_0 Risk_Factor Currency Maturity Expiry     
Level_1                                  10y  15y
Level_2                                          
0               cor       ER       2y     10   40
1               cor       ER       5y     22   63
2             

Let's go through the code step by step to understand what each part is doing:

1. **Define the Multi-Index for Columns:**
   ```python
   columns = pd.MultiIndex.from_tuples([
       ('Risk_Factor', '', ''),
       ('Currency', '', ''),
       ('Maturity', '', ''),
       ('Expiry', '10y', ''),
       ('Expiry', '15y', '')
   ], names=['Level_0', 'Level_1', 'Level_2'])
   ```
   This creates a multi-level index for the columns of the DataFrame. Each tuple represents a column with levels of hierarchy, which are 'Risk_Factor', 'Currency', 'Maturity', and 'Expiry' with sub-columns '10y' and '15y'.

2. **Define Input DataFrames with Multi-Indexed Columns:**
   ```python
   spread_df = pd.DataFrame([...], columns=columns)
   risk_df = pd.DataFrame([...], columns=columns)
   ```
   Two DataFrames `spread_df` and `risk_df` are created using the multi-index defined above. The data provided in the lists corresponds to the rows of the DataFrame.

3. **Function to Calculate Output DataFrames:**
   ```python
   def calculate_exit_costs(spread_df, risk_df):
   ```
   This function, `calculate_exit_costs`, will take the two input DataFrames and calculate the exit costs based on the spreads and risks.

4. **Element-wise Multiplication:**
   ```python
   exit_cost_wide_df = spread_df.iloc[:, 3:] * risk_df.iloc[:, 3:]
   ```
   The function performs element-wise multiplication of the '10y' and '15y' columns from both input DataFrames to calculate the exit costs.

5. **Concatenate Non-multiplied Columns with Calculated Exit Costs:**
   ```python
   exit_cost_wide_df = pd.concat([spread_df.iloc[:, :3], exit_cost_wide_df], axis=1)
   ```
   The non-multiplied columns ('Risk_Factor', 'Currency', 'Maturity') are concatenated with the calculated exit costs to form a new DataFrame that includes all the necessary information.

6. **Convert Wide Format DataFrame to Long Format:**
   ```python
   exit_cost_long_df = pd.melt(exit_cost_wide_df, id_vars=[...], var_name=['Level_0', 'Expiry', 'Level_2'], value_name='Exit Cost')
   ```
   The `pd.melt` function is used to transform the wide format DataFrame into a long format. The `id_vars` parameter specifies which columns to keep as they are, while `var_name` and `value_name` parameters define the names of the new columns that will hold the variable names ('10y', '15y') and their corresponding values.

7. **Drop Unnecessary 'Level' Columns:**
   ```python
   exit_cost_long_df = exit_cost_long_df.drop(columns=['Level_0', 'Level_2'])
   ```
   After melting, the unnecessary 'Level' columns are dropped from the DataFrame as they are not needed for the final output.

8. **Rename Columns to Remove 'Level' Related Information:**
   ```python
   exit_cost_long_df.columns = ['Risk_Factor', 'Currency', 'Maturity', 'Expiry', 'Exit Cost']
   ```
   The columns of the long format DataFrame are renamed to provide clear and meaningful names, removing any 'Level' related information.

9. **Return the Calculated DataFrames:**
   ```python
   return exit_cost_wide_df, exit_cost_long_df
   ```
   The function returns the calculated wide and long format DataFrames.

10. **Call the Function and Get Output DataFrames:**
    ```python
    exit_cost_wide_df, exit_cost_long_df = calculate_exit_costs(spread_df, risk_df)
    ```
    The function `calculate_exit_costs` is called with the input DataFrames, and the resulting output DataFrames are stored in `exit_cost_wide_df` and `exit_cost_long_df`.

11. **Display the Resulting DataFrames:**
    ```python
    print("Exit Cost (wide format):")
    print(exit_cost_wide_df)
    print("\nExit Cost (long format):")
    print(exit_cost_long_df)
    ```
    Finally, the code prints out the resulting wide and long format DataFrames to display the calculated exit costs.

This code is designed to process financial data, specifically to calculate and transform exit cost data for different risk factors, currencies, and maturities from a wide format to a long format for further analysis or reporting. 

In [9]:
import pandas as pd

# Below defines a function named insert_new_header that takes three parameters:
# df: The DataFrame to which you want to add new headers.
# new_headers: A list of new header labels that you want to insert.
# position: The index position in the header tuple where the new headers should be inserted.
def insert_new_header(df, new_headers, position):
    # Convert the current DataFrame headers into a list of tuples
    # This line converts the DataFrame’s column headers into a list of tuples called current_headers. 
    # If the DataFrame has multi-index headers, each header will be a tuple; otherwise, 
    # each header will be a single string within a tuple.
    current_headers = list(df.columns)
    
    # Check if the new_headers list has enough elements to insert
    # Before proceeding, the function checks if the length of new_headers matches the number of columns in df. 
    # If not, it raises a ValueError because each column must have a corresponding new header to insert.
    if len(new_headers) != len(current_headers):
        raise ValueError("The length of new_headers must match the number of columns in the DataFrame.")
    
    # Create new multi-index headers by inserting the new headers at the specified position
    # A new list called new_multi_index is created to store the new multi-index headers.
    # The for loop iterates over current_headers, and for each header:
    # It converts the header tuple to a list (new_header) to allow insertion of the new header.
    # It inserts the new header from new_headers at the specified position within new_header.
    # It converts new_header back to a tuple and appends it to new_multi_index.
    new_multi_index = []
    for i, header in enumerate(current_headers):
        new_header = list(header)  # Convert tuple to list to allow insertions
        new_header.insert(position, new_headers[i])  # Insert new header
        new_multi_index.append(tuple(new_header))  # Convert back to tuple and add to the new multi-index list
    
    # Assign the new multi-index headers to the DataFrame
    # This line creates a new MultiIndex object from the list of new header tuples (new_multi_index) 
    # and assigns it to the DataFrame’s columns.
    df.columns = pd.MultiIndex.from_tuples(new_multi_index)
    
    # Finally, the function returns the modified DataFrame with the new headers inserted.
    return df

# Define the data for the DataFrame
data = {
    ('A1', 'B1'): [1, 3],
    ('A2', 'B2'): [2, 4]
}

df = pd.DataFrame(data)

# Assuming 'df' is your existing DataFrame with multi-index headers
new_headers = ['C1', 'C2']  # New headers to insert
position = 2  # Position to insert the new headers (0-based index)
try:
    output_df = insert_new_header(df, new_headers, position)
    print(output_df)
except ValueError as e:
    print(e)

  A1 A2
  B1 B2
  C1 C2
0  1  2
1  3  4


In [14]:
# In a multi-index DataFrame, when the higher-level index (in this case ‘A1’) is the same across multiple columns, 
# pandas displays it only once to avoid repetition and make the DataFrame easier to read.In such cases, by default
# pandas suppresses the repeated display of the same header value. However, to avoid any confusion,
# if you want to display the DataFrame such that each column explicitly shows all levels of the header,
# we can convert the DataFrame to a string while disabling the index sparsification.
# Set the display option to expand the DataFrame representation
pd.set_option('display.multi_sparse', False) 

#The aggregate_multiindex method is designed to process a pandas DataFrame that has multi-index column headers. 
#Its purpose is to aggregate the data by summing up the values of columns that share the same last header 
#(Level 3 in your case), while retaining the first header (Level 1) associated with them. 
#Here’s a step-by-step explanation of what the method does:
#
#Check for Multi-Index Headers: The method first checks if the DataFrame df has multi-index headers. 
#If not, it raises a ValueError.
#Grouping and Aggregation: If the DataFrame has multi-index headers, the method groups the columns by the last level 
#of the index (which represents the most specific header, like ‘B’ or ‘D’). It then sums the values of these grouped columns.
#Retain Specific Headers: After aggregation, the method needs to retain the first header (‘A1’) for each aggregated column. 
#To do this, it creates a new list of tuples, new_columns, where each tuple contains the first header 
#and the last header (e.g., ('A1', 'B') and ('A1', 'D')).
#Assign New Headers: These tuples are then used to create a new MultiIndex for the columns of the aggregated DataFrame. 
#This ensures that each column shows both the first header (‘A1’) and the aggregated header (‘B’ or ‘D’).
#Sort Columns: Finally, the columns are sorted based on the first level to maintain a logical order, 
#similar to the original DataFrame.

# Define the method to aggregate the columns and retain first and last headers
def aggregate_multiindex(df):
    # Check if the DataFrame has a multi-index header
    if isinstance(df.columns, pd.MultiIndex):
        # Group by the last level and sum the values
        df_aggregated = df.groupby(level=-1, axis=1).sum()
        
        # Retain the first header associated with the last header
        # Here’s a breakdown of what each part of this line does:
        # df.columns[0][0]: This accesses the first element of the first tuple in the DataFrame’s columns. 
        # In the context of your DataFrame, df.columns[0][0] would be ‘A1’, which is the ‘Level 1’ header 
        # that you want to retain.
        # col: This is a variable that represents each column name in the df_aggregated.columns. 
        # After the aggregation, these are the last headers (‘B’, ‘D’, etc.) that you want to keep.
        # [(df.columns[0][0], col) for col in df_aggregated.columns]: This is a list comprehension 
        # that iterates over each column in df_aggregated.columns, and for each column, 
        # it creates a tuple with ‘A1’ as the first element and the column name (col) as the second element.
        new_columns = [(df.columns[0][0], col) for col in df_aggregated.columns]
        
        # Assign new multi-index columns to the aggregated DataFrame
        df_aggregated.columns = pd.MultiIndex.from_tuples(new_columns)
        
        return df_aggregated
    else:
        raise ValueError("The DataFrame does not have a multi-index header.")


# Example usage:
# Assuming 'input_df' is your input DataFrame with a multi-index header
# Create the input DataFrame with multi-index columns
tuples = [('A1', 'B1', 'B'), ('A1', 'B2', 'B'), ('A1', 'D1', 'D'), ('A1', 'D2', 'D')]
index = pd.MultiIndex.from_tuples(tuples)
input_df = pd.DataFrame([[1, 3, 5, 7], [2, 4, 6, 8]], columns=index)

# Apply the method to get the output DataFrame
output_df = aggregate_multiindex(input_df)
# Display the DataFrames
print("Input DataFrame:")
print(input_df)
print("\nOutput DataFrame:")
print(output_df)

Input DataFrame:
  A1 A1 A1 A1
  B1 B2 D1 D2
   B  B  D  D
0  1  3  5  7
1  2  4  6  8

Output DataFrame:
  A1  A1
   B   D
0  4  12
1  6  14
