# Proteomics

In [None]:
def read_excel_file(file_path, sheet_name=0):
    """
    Reads an Excel (.xlsx) file into a pandas DataFrame.

    :param file_path: Path to the Excel file.
    :param sheet_name: Sheet name or index (default is first sheet).
    :return: pandas DataFrame or None if error occurs.
    """
    try:
        # Validate file existence
        if not os.path.isfile(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")

        # Validate file extension
        if not file_path.lower().endswith(".xlsx"):
            raise ValueError("Only .xlsx files are supported.")

        # Read Excel file
        df = pd.read_excel(file_path, sheet_name=sheet_name, engine="openpyxl")
        print(f"Successfully loaded sheet '{sheet_name}' from {file_path}")
        return df

    except FileNotFoundError as fnf_err:
        print(f"Error: {fnf_err}")
    except ValueError as val_err:
        print(f"Error: {val_err}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

    return None

In [None]:
file_path = "/home/jamie/Documents/AI4BI/BiomarkersOfAgeChallenge/dataverse_files/BoAC_CNS_Alamar.xlsx"  # Replace with your file path
sheet = 0  # You can specify the sheet name or index here
CNS_Alamar = read_excel_file(file_path, sheet)
CNS_Alamar.head()

Successfully loaded sheet '0' from /home/jamie/Documents/AI4BI/BiomarkersOfAgeChallenge/dataverse_files/BoAC_CNS_Alamar.xlsx


Unnamed: 0,Panel,PanelLotNumber,PlateID,SampleName,SampleType,Target,AlamarTargetID,UniProtID,ProteinName,SampleQC,LOD,NPQ
0,CNS Disease Panel V1,panelLot021,Plate_01,A_01_BoA_Plasma_P1,Sample,ACHE,t8246,P22303,Acetylcholinesterase,PASS,4.482828,13.182884
1,CNS Disease Panel V1,panelLot021,Plate_01,A_02_BoA_Plasma_P9,Sample,ACHE,t8246,P22303,Acetylcholinesterase,PASS,4.482828,13.887148
2,CNS Disease Panel V1,panelLot021,Plate_01,A_03_BoA_Plasma_P17,Sample,ACHE,t8246,P22303,Acetylcholinesterase,PASS,4.482828,13.21353
3,CNS Disease Panel V1,panelLot021,Plate_01,A_04_BoA_Plasma_P25,Sample,ACHE,t8246,P22303,Acetylcholinesterase,PASS,4.482828,13.665878
4,CNS Disease Panel V1,panelLot021,Plate_01,A_05_BoA_Plasma_P33,Sample,ACHE,t8246,P22303,Acetylcholinesterase,PASS,4.482828,13.840203


In [None]:
file_path = "/home/jamie/Documents/AI4BI/BiomarkersOfAgeChallenge/dataverse_files/BoAC_Inflammation_Alamar.xlsx"  # Replace with your file path
sheet = 0  # Can be sheet name like "Sheet1" or index like 0
Inflammation_Alamar = read_excel_file(file_path, sheet)
Inflammation_Alamar.head()

Successfully loaded sheet '0' from /home/jamie/Documents/AI4BI/BiomarkersOfAgeChallenge/dataverse_files/BoAC_Inflammation_Alamar.xlsx


Unnamed: 0,Panel,PanelLotNumber,PlateID,SampleName,SampleType,Target,AlamarTargetID,UniProtID,ProteinName,SampleQC,LOD,NPQ
0,200-Plex Inflammation Panel v2,panelLot018,Plate_01,A_01_BoA_Plasma_P1,Sample,AGER,t5521,Q15109,Advanced glycosylation end product-specific re...,PASS,4.174109,12.932788
1,200-Plex Inflammation Panel v2,panelLot018,Plate_01,A_02_BoA_Plasma_P9,Sample,AGER,t5521,Q15109,Advanced glycosylation end product-specific re...,PASS,4.174109,13.594548
2,200-Plex Inflammation Panel v2,panelLot018,Plate_01,A_03_BoA_Plasma_P17,Sample,AGER,t5521,Q15109,Advanced glycosylation end product-specific re...,PASS,4.174109,13.938275
3,200-Plex Inflammation Panel v2,panelLot018,Plate_01,A_04_BoA_Plasma_P25,Sample,AGER,t5521,Q15109,Advanced glycosylation end product-specific re...,PASS,4.174109,12.903652
4,200-Plex Inflammation Panel v2,panelLot018,Plate_01,A_05_BoA_Plasma_P33,Sample,AGER,t5521,Q15109,Advanced glycosylation end product-specific re...,PASS,4.174109,14.069248


In [None]:
file_path = "/home/jamie/Documents/AI4BI/BiomarkersOfAgeChallenge/dataverse_files/BoAC_plasma_metadata.xlsx"  # Replace with your file path
sheet = 0  # Can be sheet name like "Sheet1" or index like 0
Plasma_Metadata = read_excel_file(file_path, sheet)
Plasma_Metadata.head()

Successfully loaded sheet '0' from /home/jamie/Documents/AI4BI/BiomarkersOfAgeChallenge/dataverse_files/BoAC_plasma_metadata.xlsx


Unnamed: 0,Plasma.ID,Sex,Race1,Race2,Ethnicity,Age
0,P1,Male,Black,,Non Hispanic,67.137577
1,P2,Male,White,,Non Hispanic,85.705681
2,P3,Male,White,,Non Hispanic,82.902122
3,P4,Female,White,,DECLINED,22.477755
4,P5,Male,White,,Non Hispanic,70.584531


In [None]:
CNS_Alamar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64604 entries, 0 to 64603
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Panel           64604 non-null  object 
 1   PanelLotNumber  64604 non-null  object 
 2   PlateID         64604 non-null  object 
 3   SampleName      64604 non-null  object 
 4   SampleType      64604 non-null  object 
 5   Target          64604 non-null  object 
 6   AlamarTargetID  64604 non-null  object 
 7   UniProtID       64604 non-null  object 
 8   ProteinName     64604 non-null  object 
 9   SampleQC        64604 non-null  object 
 10  LOD             63562 non-null  float64
 11  NPQ             64604 non-null  float64
dtypes: float64(2), object(10)
memory usage: 5.9+ MB


In [None]:
Inflammation_Alamar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130250 entries, 0 to 130249
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Panel           130250 non-null  object 
 1   PanelLotNumber  130250 non-null  object 
 2   PlateID         130250 non-null  object 
 3   SampleName      130250 non-null  object 
 4   SampleType      130250 non-null  object 
 5   Target          130250 non-null  object 
 6   AlamarTargetID  130250 non-null  object 
 7   UniProtID       130250 non-null  object 
 8   ProteinName     130250 non-null  object 
 9   SampleQC        130250 non-null  object 
 10  LOD             129208 non-null  float64
 11  NPQ             130250 non-null  float64
dtypes: float64(2), object(10)
memory usage: 11.9+ MB


In [None]:
Plasma_Metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503 entries, 0 to 502
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Plasma.ID  503 non-null    object
 1   Sex        503 non-null    object
 2   Race1      503 non-null    object
 3   Race2      6 non-null      object
 4   Ethnicity  503 non-null    object
 5   Age        503 non-null    object
dtypes: object(6)
memory usage: 23.7+ KB
