In [22]:
# Step 1: Import necessary libraries
import pandas as pd
from scipy.stats import ttest_rel

# Step 2: Load the data
file_path = 'LN_total_IgG_glycomedata.xlsx'  # Replace with the actual file path
data = pd.read_excel(file_path)

# Step 3: Reshape the data
# Assuming the columns are structured as you mentioned
patient_col = "Patient number"
time_col = "Time point"
variables = data.columns[4:]  # Assuming variables start from column E

# Step 4: Perform paired t-tests
results = []

for variable in variables:
    # Extract T0 and T12 for the current variable
    t0_values = data[data[time_col] == "T0"].sort_values(by=patient_col)[variable].values
    t12_values = data[data[time_col] == "T12"].sort_values(by=patient_col)[variable].values

    # Perform paired t-test
    t_stat, p_value = ttest_rel(t0_values, t12_values)

    # Determine significance and direction
    direction = "increased" if t_stat > 0 else "decreased"
    results.append({"Variable": variable, "T-statistic": t_stat, "P-value": p_value, "Direction": direction})

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Step 5: Identify significant changes
significance_level = 0.05
results_df["Significant"] = results_df["P-value"] < significance_level

# Separate increased and decreased
increased = results_df[(results_df["Significant"]) & (results_df["Direction"] == "increased")]
decreased = results_df[(results_df["Significant"]) & (results_df["Direction"] == "decreased")]

# Output results
print("Significantly Increased Variables:")
print(increased)
print("\nSignificantly Decreased Variables:")
print(decreased)


Significantly Increased Variables:
   Variable  T-statistic   P-value  Direction  Significant
2       GP3     3.603668  0.002031  increased         True
3       GP4     4.278185  0.000452  increased         True
4       GP5     3.689974  0.001676  increased         True
6       GP7     5.245505  0.000055  increased         True
7       GP8     2.765208  0.012752  increased         True
8       GP9     6.535331  0.000004  increased         True
10     GP11     3.517142  0.002461  increased         True
12     GP13     2.591255  0.018435  increased         True
31       S1     2.397891  0.027546  increased         True
32       S2     4.152243  0.000598  increased         True

Significantly Decreased Variables:
   Variable  T-statistic   P-value  Direction  Significant
15     GP16    -4.287478  0.000443  decreased         True
18     GP19    -2.922596  0.009090  decreased         True
22     GP23    -4.095926  0.000678  decreased         True
24     GP25    -2.347401  0.030542  decrease

In [23]:
# Step 5: Identify significant changes
significance_level = 0.05
results_df["Significant"] = results_df["P-value"] < significance_level

# Separate increased and decreased
increased = results_df[(results_df["Significant"]) & (results_df["Direction"] == "increased")]
decreased = results_df[(results_df["Significant"]) & (results_df["Direction"] == "decreased")]

# Export results to Excel
increased_file = "significant_increased_variables.xlsx"
decreased_file = "significant_decreased_variables.xlsx"

increased.to_excel(increased_file, index=False)
decreased.to_excel(decreased_file, index=False)

print(f"Significantly increased variables saved to {increased_file}")
print(f"Significantly decreased variables saved to {decreased_file}")


Significantly increased variables saved to significant_increased_variables.xlsx
Significantly decreased variables saved to significant_decreased_variables.xlsx


In [4]:
# Step 5: Identify significant changes
significance_level = 0.05
results_df["Significant"] = results_df["P-value"] < significance_level

# Prepare lists of significant variables
increased_vars = results_df[(results_df["Significant"]) & (results_df["Direction"] == "increased")]["Variable"].tolist()
decreased_vars = results_df[(results_df["Significant"]) & (results_df["Direction"] == "decreased")]["Variable"].tolist()

# Initialize new columns for each variable's significance
for variable in data.columns[4:]:  # Assuming variables start at column E
    data[f"{variable}_Significantly Increased"] = data[variable].where(data["Time point"] == "T12").isin(increased_vars).astype(int)
    data[f"{variable}_Significantly Decreased"] = data[variable].where(data["Time point"] == "T12").isin(decreased_vars).astype(int)

# Save the updated dataset with significance annotations
output_file = "updated_dataset_with_significant_hits.xlsx"
data.to_excel(output_file, index=False)

print(f"Updated dataset saved to {output_file}")


Updated dataset saved to updated_dataset_with_significant_hits.xlsx


  data[f"{variable}_Significantly Decreased"] = data[variable].where(data["Time point"] == "T12").isin(decreased_vars).astype(int)
  data[f"{variable}_Significantly Increased"] = data[variable].where(data["Time point"] == "T12").isin(increased_vars).astype(int)
  data[f"{variable}_Significantly Decreased"] = data[variable].where(data["Time point"] == "T12").isin(decreased_vars).astype(int)
  data[f"{variable}_Significantly Increased"] = data[variable].where(data["Time point"] == "T12").isin(increased_vars).astype(int)
  data[f"{variable}_Significantly Decreased"] = data[variable].where(data["Time point"] == "T12").isin(decreased_vars).astype(int)
  data[f"{variable}_Significantly Increased"] = data[variable].where(data["Time point"] == "T12").isin(increased_vars).astype(int)
  data[f"{variable}_Significantly Decreased"] = data[variable].where(data["Time point"] == "T12").isin(decreased_vars).astype(int)
  data[f"{variable}_Significantly Increased"] = data[variable].where(data["Time poi

In [5]:
# Step 5: Identify significant changes
significance_level = 0.05
results_df["Significant"] = results_df["P-value"] < significance_level

# Get the list of variables that significantly increased
increased_vars = results_df[(results_df["Significant"]) & (results_df["Direction"] == "increased")]["Variable"].tolist()

# Filter the dataset to include only these variables and the key columns
filtered_data = data[["Patient number", "Time point"] + increased_vars]

# Save the filtered dataset
output_file = "variables_with_significant_increase.xlsx"
filtered_data.to_excel(output_file, index=False)

print(f"Filtered dataset with significantly increased variables saved to {output_file}")


Filtered dataset with significantly increased variables saved to variables_with_significant_increase.xlsx


In [24]:
# Step 5: Identify significant changes
significance_level = 0.05
results_df["Significant"] = results_df["P-value"] < significance_level

# Get the list of variables that significantly increased and decreased
increased_vars = results_df[(results_df["Significant"]) & (results_df["Direction"] == "increased")]["Variable"].tolist()
decreased_vars = results_df[(results_df["Significant"]) & (results_df["Direction"] == "decreased")]["Variable"].tolist()

# Filter the dataset for significantly increased variables
increased_data = data[["Patient number", "Time point"] + increased_vars]
increased_output_file = "variables_with_significant_increase.xlsx"
increased_data.to_excel(increased_output_file, index=False)
print(f"Filtered dataset with significantly increased variables saved to {increased_output_file}")

# Filter the dataset for significantly decreased variables
decreased_data = data[["Patient number", "Time point"] + decreased_vars]
decreased_output_file = "variables_with_significant_decrease.xlsx"
decreased_data.to_excel(decreased_output_file, index=False)
print(f"Filtered dataset with significantly decreased variables saved to {decreased_output_file}")


Filtered dataset with significantly increased variables saved to variables_with_significant_increase.xlsx
Filtered dataset with significantly decreased variables saved to variables_with_significant_decrease.xlsx


In [8]:
import pandas as pd

# Function to calculate delta (T12 - T0)
def calculate_delta_for_filtered_data(input_file, output_file):
    # Load the filtered data
    data = pd.read_excel(input_file)

    # Ensure that the dataset contains only "Patient number", "Time point", and the measured variables
    variables = data.columns[2:]  # Assuming that the first two columns are "Patient number" and "Time point"

    # Pivot the data to have T0 and T12 as columns for each patient
    pivoted_data = data.pivot(index="Patient number", columns="Time point", values=variables)

    # Calculate delta (T12 - T0)
    delta_data = pivoted_data["T12"] - pivoted_data["T0"]

    # Save delta data to an Excel file
    delta_data.reset_index().to_excel(output_file, index=False)
    print(f"Delta values saved to {output_file}")

# Calculate delta for significantly increased variables
calculate_delta_for_filtered_data("variables_with_significant_increase.xlsx", "delta_significant_increase.xlsx")

# Calculate delta for significantly decreased variables
calculate_delta_for_filtered_data("variables_with_significant_decrease.xlsx", "delta_significant_decrease.xlsx")



KeyError: 'T12'