# Necessary Libraries

In [2]:
import pandas as pd
import os
import numpy as np


def calculate_average_temperature(input_file_path, sheet_name, output_dir):
    # Read the sheet into a DataFrame
    df = pd.read_excel(input_file_path, sheet_name=sheet_name)

    # Calculate the average temperature for each cycle (row)
    df['Average_Temperature'] = df.mean(axis=1)

    # Create a new DataFrame with the average temperatures
    average_temperature_df = df[['Average_Temperature']]

    # Generate the output file name based on the input file name
    base_name = os.path.basename(input_file_path)
    name, ext = os.path.splitext(base_name)
    output_file_name = f"{name}_Temperature{ext}"
    output_file_path = os.path.join(output_dir, output_file_name)

    # Save the new DataFrame to a new Excel file
    average_temperature_df.to_excel(output_file_path, index=False, engine='openpyxl')

    # print(f"The new file with average temperatures is saved as {output_file_path}")
    
def process_excel_files(input_folder_path, output_folder_path):
    # Ensure the output directory exists
    os.makedirs(output_folder_path, exist_ok=True)

    # Get a list of all Excel files in the folder
    file_list = [f for f in os.listdir(input_folder_path) if f.endswith('.xlsx')]

    for file_name in file_list:
        input_file_path = os.path.join(input_folder_path, file_name)
        output_file_path = os.path.join(output_folder_path, 'updated_' + file_name)
        
        try:
            # Load the Excel file
            xls = pd.ExcelFile(input_file_path)

            # Load the relevant sheets
            qd_df = pd.read_excel(xls, 'Qd')

            # Create delQ sheet
            delQ = qd_df.iloc[10:].reset_index(drop=True).subtract(qd_df.iloc[9], axis=1)
            delQ_sheet_name = "delQ"

            # Create delQ_head sheet
            delQ_head = delQ.apply(lambda row: row.sum() / 130, axis=1)
            delQ_head_sheet_name = "delQ_head"

            # Calculate Ftr2(k)
            min_delQ = delQ.min(axis=1)
            abs_min_delQ = abs(min_delQ)
            ftr2_k = np.log10(abs_min_delQ)
            ftr2_k_sheet_name = "Ftr2(k)"

            # Create delQ-Q_head sheet
            delQ_Q_head = delQ.subtract(delQ_head, axis=0)
            delQ_Q_head_sheet_name = "delQ-Q_head"

            # Calculate Ftr1(k)
            ftr1_k = delQ_Q_head.apply(lambda row: np.log10(abs((row**2).sum() / 129)), axis=1)
            ftr1_k_sheet_name = "Ftr1(k)"

            # Save the new sheets to the specified output Excel file
            with pd.ExcelWriter(output_file_path, engine='openpyxl', mode='w') as writer:
                for sheet_name in xls.sheet_names:
                    pd.read_excel(xls, sheet_name=sheet_name).to_excel(writer, sheet_name=sheet_name, index=False)
                delQ.to_excel(writer, sheet_name=delQ_sheet_name, index=False)
                delQ_head.to_frame().to_excel(writer, sheet_name=delQ_head_sheet_name, index=False, header=False)
                delQ_Q_head.to_excel(writer, sheet_name=delQ_Q_head_sheet_name, index=False)
                ftr1_k.to_frame().to_excel(writer, sheet_name=ftr1_k_sheet_name, index=False, header=False)
                ftr2_k.to_frame().to_excel(writer, sheet_name=ftr2_k_sheet_name, index=False, header=False)

            # print(f"Sheet {file_name} successfully processed")

        except (pd.errors.EmptyDataError, pd.errors.ParserError, FileNotFoundError, ValueError, KeyError) as e:
            print(f"Error processing {file_name}: {e}")
        except Exception as e:
            print(f"Unexpected error processing {file_name}: {e}")

def pick_out_feature_1(input_path, output_path):
    # List of all Excel files in the specified directory
    files = [os.path.join(input_path, file) for file in os.listdir(input_path) if file.endswith('.xlsx')]
    
    # Dictionary to hold the values
    final_row_values = {}
    
    # Process each file
    for file in files:
        # Read the Excel file
        df = pd.read_excel(file, sheet_name='Ftr1(k)')
        # Get the value of the final row in the first column
        final_value = df.iloc[-1, 0]
        # Store the value in the dictionary with the filename as the key
        final_row_values[os.path.basename(file)] = final_value
    
    # Convert the dictionary to a DataFrame for better visualization and saving
    final_values_df = pd.DataFrame(list(final_row_values.items()), columns=['Filename', 'Ftr1(k)'])
    
    # Sort the DataFrame by Filename
    final_values_df.sort_values(by='Filename', inplace=True)
    
    # Save the final values to a new Excel file
    output_file = os.path.join(output_path, 'Ftr1.xlsx')
    final_values_df.to_excel(output_file, index=False)

    return final_values_df

def pick_out_feature_2(input_path, output_path):
    # List of all Excel files in the specified directory
    files = [os.path.join(input_path, file) for file in os.listdir(input_path) if file.endswith('.xlsx')]
    
    # Dictionary to hold the values
    final_row_values = {}
    
    # Process each file
    for file in files:
        # Read the Excel file
        df = pd.read_excel(file, sheet_name='Ftr2(k)')
        # Get the value of the final row in the first column
        final_value = df.iloc[-1, 0]
        # Store the value in the dictionary with the filename as the key
        final_row_values[os.path.basename(file)] = final_value
    
    # Convert the dictionary to a DataFrame for better visualization and saving
    final_values_df = pd.DataFrame(list(final_row_values.items()), columns=['Filename', 'Ftr2(k)'])
    
    # Sort the DataFrame by Filename
    final_values_df.sort_values(by='Filename', inplace=True)
    
    # Save the final values to a new Excel file
    output_file = os.path.join(output_path, 'Ftr2.xlsx')
    final_values_df.to_excel(output_file, index=False)

    return final_values_df

def rename_files(directory):
    for file in os.listdir(directory):
        # Check if the file name matches the pattern 'updated_"real name"_new_extended'
        if 'updated_' in file and '_new_extended' in file:
            # Extract the real name from the file name
            real_name = file.split('_')[1]
            # Define the new file name
            new_name = f"{real_name}.xlsx"
            # Construct full file paths
            old_file_path = os.path.join(directory, file)
            new_file_path = os.path.join(directory, new_name)
            # Rename the file
            os.rename(old_file_path, new_file_path)

def add_difference_Qd(file_path):
    try:
        # print(f"Processing file: {file_path}")
        xls = pd.ExcelFile(file_path)
        
        if 'Difference Qd' in xls.sheet_names:
            # print(f"Sheet 'Difference Qd' already exists in file: {file_path}. Skipping file.")
            return

        if 'Qd' in xls.sheet_names:
            # Load the 'Qd' sheet
            qd_df = pd.read_excel(xls, 'Qd')

            # Extract the 10th, 100th, and 200th cycles from the 'Qd' sheet
            cycle_10_qd = qd_df.iloc[9]
            cycle_100_qd = qd_df.iloc[99]
            cycle_200_qd = qd_df.iloc[199]

            # Create the new sheet data
            new_sheet_data = pd.DataFrame({
                '100th Qd - 10th Qd': cycle_100_qd - cycle_10_qd,
                '200th Qd - 10th Qd': cycle_200_qd - cycle_10_qd
            }).T

            # Save the new sheet into the original file
            with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:
                new_sheet_data.to_excel(writer, sheet_name='Difference Qd')

            # print(f"Successfully processed file: {file_path}")
        else:
            print(f"Sheet 'Qd' not found in file: {file_path}. Skipping file.")
    except Exception as e:
        print(f"Error processing file: {file_path}. Error: {e}. Skipping file.")



# Attract Temperature Value

In [None]:
# Directory containing the Excel files
input_dir = '...//val_set'
output_dir = '...//Temperature'
sheet_name = 'T_ex'

# Process each Excel file in the directory
for file_name in os.listdir(input_dir):
    if file_name.endswith('.xlsx'):
        input_file_path = os.path.join(input_dir, file_name)
        print(f"Processing file: {input_file_path}")  # Print the file being processed
        calculate_average_temperature(input_file_path, sheet_name, output_dir)


# Attract Feature

### Attract Feature 1 and 2

In [None]:
# Define the folder containing the Excel files
b1c_new = '...//3_Range_22-24/b1c_new'
b2c_new = '...//3_Range_22-24/b2c_new'
b3c_new = '...//3_Range_22-24/b3c_new'
b9c_new = '...//3_Range_22-24/b9c_new'


output_folder_path = '...//3_Range_22-24/storage'

# Call the function
process_excel_files(b1c_new, output_folder_path)
process_excel_files(b2c_new, output_folder_path)
process_excel_files(b3c_new, output_folder_path)
process_excel_files(b9c_new, output_folder_path)



### Add difference Qd 100 and 200

In [None]:
# Path to the folder containing the Excel files
folder_path = '...//3_Range_22-24/storage'

# Process all Excel files in the specified folder
for filename in os.listdir(folder_path):
    if filename.endswith(".xlsx"):
        file_path = os.path.join(folder_path, filename)
        add_difference_Qd(file_path)


### Rename

In [5]:
directory = '...//3_Range_22-24/storage'

# Rename the files
rename_files(directory)

# List the files in the directory to verify renaming
renamed_files = os.listdir(directory)

### Pick out Feature 1 (200)

In [None]:
# Call the function with the given input and output paths
input_path = '...//3_Range_22-24/storage'
output_path = '...//3_Range_22-24'
pick_out_feature_1(input_path, output_path)


### Pick out Feature 2 (200)

In [None]:
# Call the function with the given input and output paths
input_path = '...//3_Range_22-24/storage'
output_path = '...//3_Range_22-24'
pick_out_feature_2(input_path, output_path)
