## create all combinations of treatments 

In [12]:
import pandas as pd
import os 

def read_data(file_path, sheet_name, save_path):

    # Read the specific sheet into a DataFrame
    df = pd.read_excel(file_path, sheet_name=sheet_name)

    ## drop unnecessary columns 

    columns_to_drop = ['Mouse_ID', 'Tumor_Side', 'Age']
    df = df.drop(columns=columns_to_drop)

    # Specify the treatment type you want to filter by
    treatment_type = [0,1,11,10]  

    # Filter the DataFrame based on the treatment column
    sham_df = df[df['Treatment'] == treatment_type[0]]
    sham_df = sham_df.drop(columns= 'Treatment')

    ir_df = df[df['Treatment'] == treatment_type[1]]
    ir_df = ir_df.drop(columns= 'Treatment')

    aspirin_df = df[df['Treatment'] == treatment_type[2]]
    aspirin_df = aspirin_df.drop(columns= 'Treatment')

    ir_aspirin_df = df[df['Treatment'] == treatment_type[3]]
    ir_aspirin_df = ir_aspirin_df.drop(columns= 'Treatment')
    # Show the filtered DataFrame
    # print(sham_df.head())

    # Save column names to a separate file
    columns_path = os.path.join(save_path, "columns.txt")
    with open(columns_path, 'w') as f:
        f.write(','.join(df.columns))

    return sham_df, ir_df, aspirin_df, ir_aspirin_df


def combine_dfs(sham_df, ir_df, aspirin_df, ir_aspirin_df, save_path):
    # Store DataFrames in a dictionary for better management
    dfs = {
        'sham': sham_df,
        'ir': ir_df,
        'aspirin': aspirin_df,
        'ir+aspirin': ir_aspirin_df
    }

    # Empty list to store results
    merged_dataframes = []

    # Iterate over pairs of DataFrame names and DataFrames
    for name1, df1 in dfs.items():
        for name2, df2 in dfs.items():
            if name1 < name2:  # This check avoids repeating pairs and self-merging
                # Add a new column to each DataFrame to indicate the origin (0 or 1)
                df1_modified = df1.copy()
                df1_modified['Origin'] = 0  # 0 for rows from the first DataFrame
                df1_modified = df1_modified[['Origin'] + [col for col in df1_modified.columns if col != 'Origin']]
                
                df2_modified = df2.copy()
                df2_modified['Origin'] = 1  # 1 for rows from the second DataFrame
                df2_modified = df2_modified[['Origin'] + [col for col in df2_modified.columns if col != 'Origin']]
                
                # Merge the pair of modified DataFrames
                merged_df = pd.concat([df1_modified, df2_modified])
                
                # Store the merged DataFrame with a descriptive name
                merged_dataframes.append((f"{name1}_{name2}", merged_df))

    # Output the results and optionally save them
    for name, df in merged_dataframes:
        # Save DataFrame to CSV without headers
        full_path = os.path.join(save_path, f"{name}.csv")
        df.to_csv(full_path, index=False, header=False)
        print(f"Saved DataFrame '{name}' to '{full_path}'")




# Load the Excel file
file_path = '.\\raw_data\Final_Spreadsheet_separated_by_age.xlsx'
sheet_name = 'Age_W5'  # Replace with your actual sheet name
save_path = '.\\data\output_preprocess'
## creating the source dfs for sham, IR, Aspirin and IR+Aspirin. 
sham_df, ir_df, aspirin_df, ir_aspirin_df = read_data(file_path, sheet_name, save_path)

# Usage example:
combine_dfs(sham_df, ir_df, aspirin_df, ir_aspirin_df, save_path)


Saved DataFrame 'ir_sham' to '.\data\output_preprocess\ir_sham.csv'
Saved DataFrame 'ir_ir+aspirin' to '.\data\output_preprocess\ir_ir+aspirin.csv'
Saved DataFrame 'aspirin_sham' to '.\data\output_preprocess\aspirin_sham.csv'
Saved DataFrame 'aspirin_ir' to '.\data\output_preprocess\aspirin_ir.csv'
Saved DataFrame 'aspirin_ir+aspirin' to '.\data\output_preprocess\aspirin_ir+aspirin.csv'
Saved DataFrame 'ir+aspirin_sham' to '.\data\output_preprocess\ir+aspirin_sham.csv'


In [24]:
print(len(sham_df),len(ir_df), len(aspirin_df), len(ir_aspirin_df))
print(len(sham_df.columns))

12 10 10 10
29


## adding the names of columns to output of r code 

In [18]:
import pandas as pd
import os

def add_feature_names_csv(names_file_path, csv_directory_path, save_directory_path):
    # Read names from the file into a list
    with open(names_file_path, 'r') as file:
        names = [line.strip() for line in file]

    # Read CSV files from directory
    csv_files = [f for f in os.listdir(csv_directory_path) if f.endswith('.csv')]

    # Function to map indices to names
    def index_to_name(index):
        # Check if index is an integer and within the valid range
        if isinstance(index, int) and 1 <= index <= len(names):
            return names[index - 1]  # Convert 1-based index to 0-based
        else:
            return 'Index out of range'

    # Process each CSV file
    for csv_file in csv_files:
        # Read the current CSV file
        current_table = pd.read_csv(csv_directory_path + csv_file)
        # Convert 'V1' to integers, handling non-convertible values by coercing to NaN
        current_table['V1'] = pd.to_numeric(current_table['V1'], errors='coerce').fillna(0).astype(int)
        # Map the 'V1' column to names using the provided function
        current_table['Names'] = current_table['V1'].apply(index_to_name)

        # Save the modified table
        modified_file_path = os.path.join(save_directory_path, 'modified_' + csv_file)
        current_table.to_csv(modified_file_path, index=False)
        print(f"Processed and saved modified data to {modified_file_path}")

# Example usage of the function:
names_file_path = 'C:\\Shabani\\Projects\\tumor_latency\\data\\column_names.csv'
csv_directory_path = 'C:\\Shabani\\Projects\\tumor_latency\\data\\output_r\\'
save_directory_path = 'C:\\Shabani\\Projects\\tumor_latency\\data\\output_r_joint_features\\'

add_feature_names_csv(names_file_path, csv_directory_path, save_directory_path)


Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_modified_output_aspirin_ir+aspirin.csv
Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_output_aspirin_ir+aspirin.csv
Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_output_aspirin_ir.csv
Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_output_aspirin_sham.csv
Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_output_ir+aspirin_sham.csv
Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_output_ir_ir+aspirin.csv
Processed and saved modified data to C:\Shabani\Projects\tumor_latency\data\output_r_joint_features\modified_output_ir_sham.csv
