In [1]:
import pandas as pd
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [10]:
# Read the Excel file
excel_file = "C:\\Users\\paulo\\Desktop\Masters_Project\\Data Processing\\Erdos_data_with clamp.xlsx"

In [11]:
output_file = 'C:\\Users\\paulo\\Desktop\Masters_Project\\mmt_glucose_and_insulin_data.csv'

In [12]:
def process_mmt_sheet(file_name, sheet_name, metab):
    df = pd.read_excel(file_name, sheet_name=sheet_name)
    df_melted = df.melt(id_vars=['ID'], var_name='time', value_name='VAL')
    df_melted = df_melted.sort_values(['ID', 'time'])
    df_melted['ID'] = df_melted['ID'].str.replace('PS', 'Ex')
    df_melted['Condition'] = 'na'
    df_melted['test'] = 'HFMM'
    df_melted['metab'] = metab
    return df_melted[['ID', 'Condition', 'test', 'metab', 'VAL', 'time']]

In [13]:
# Process MMT glucose data
df_mmt_glucose = process_mmt_sheet(excel_file, 'MMT_week1_plasma_glucose', 'gluc')

# Process MMT insulin data
df_mmt_insulin = process_mmt_sheet(excel_file, 'MMT_week1_plasma_insulin', 'ins')

# Combine glucose and insulin data
df_mmt_combined = pd.concat([df_mmt_glucose, df_mmt_insulin], ignore_index=True)

# Sort the combined dataframe
df_mmt_combined = df_mmt_combined.sort_values(['ID', 'metab', 'time'])

# Remove rows with NaN values
df_mmt_combined = df_mmt_combined.dropna()

# Save to CSV
df_mmt_combined.to_csv(output_file, index=False)

print(df_mmt_combined.head(20))
print("\nData shape:", df_mmt_combined.shape)

# Additional data validation
print("\nUnique IDs:")
print(df_mmt_combined['ID'].nunique())
print("\nID counts:")
print(df_mmt_combined['ID'].value_counts())
print("\nTime points per ID and metabolite:")
print(df_mmt_combined.groupby(['ID', 'metab'])['time'].nunique())
print("\nValue ranges by metabolite:")
print(df_mmt_combined.groupby('metab')['VAL'].describe())
print("\nUnique time points:")
print(sorted(df_mmt_combined['time'].unique()))

            ID Condition  test metab        VAL time
0     Ex010001        na  HFMM  gluc   4.730000    0
1     Ex010001        na  HFMM  gluc   4.790000   30
2     Ex010001        na  HFMM  gluc   4.320000   60
3     Ex010001        na  HFMM  gluc   3.950000  120
4     Ex010001        na  HFMM  gluc   4.770000  180
5     Ex010001        na  HFMM  gluc   4.520000  240
1458  Ex010001        na  HFMM   ins   5.645788    0
1459  Ex010001        na  HFMM   ins  26.308135   30
1460  Ex010001        na  HFMM   ins  52.285097   60
1461  Ex010001        na  HFMM   ins   8.041757  120
1462  Ex010001        na  HFMM   ins  13.176386  180
1463  Ex010001        na  HFMM   ins   4.829374  240
6     Ex010002        na  HFMM  gluc   6.210000    0
7     Ex010002        na  HFMM  gluc   7.060000   30
8     Ex010002        na  HFMM  gluc   6.450000   60
9     Ex010002        na  HFMM  gluc   6.810000  120
10    Ex010002        na  HFMM  gluc   5.640000  180
11    Ex010002        na  HFMM  gluc   5.56000