In [10]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

# Load the datasets from CSV files
file_path_factors = "Table-01_Physiological_Work-related_Factors.csv"
file_path_symptoms = "Table-02_Musculoskeletal_Symptoms_Assessment.csv"

df_factors = pd.read_csv(file_path_factors)
df_symptoms = pd.read_csv(file_path_symptoms)

# Merge datasets on Participant ID
df = pd.merge(df_factors, df_symptoms, on="Participant ID")

# Drop unnecessary columns like "Name"
if "Name" in df.columns:
    df.drop(columns=["Name"], inplace=True)

# Define independent variables (predictors)
predictor_cols = [
    "Age", "Gender", "Height (cm)", "Weight (kg)", "BMI", "Handedness",
    "Physical Fitness", "Medical History", "Smoking Habits", "Alcohol Consumption",
    "Physical Exercise", "Vision Problems", "Job Tenure", "Working Days/Week",
    "Work Duration (hrs/day)", "Work Breaks (times/day)", "Table Height (cm)",
    "Sitting Height (cm)", "Use Ergonomic Aids", "Break Frequency"
]

# Convert categorical predictors to numerical using Label Encoding
label_encoders = {}
for col in predictor_cols:
    if df[col].dtype == "object":
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Define dependent variables (outcomes)
outcome_vars = [
    "Neck Pain (12 months)", "Shoulder Pain (12 months)", "Upper Back Pain (12 months)",
    "Lower Back Pain (12 months)", "Elbow Pain (12 months)", "Wrist/Hand Pain (12 months)",
    "Hips/Thighs Pain (12 months)", "Knees Pain (12 months)", "Ankles/Feet Pain (12 months)",
    "Sought Medical Attention", "Work-related Pain Cause", "Use Coping Methods (Breaks, Posture Change)"
]

# Initialize dictionary to store coefficients for each outcome
b_matrix_dict = {"Factor": predictor_cols}

# Perform logistic regression for each outcome variable
for outcome in outcome_vars:
    if outcome in df.columns:
        df[outcome] = df[outcome].map({"Yes": 1, "No": 0})  # Convert to binary
        log_reg = LogisticRegression(max_iter=1000)
        log_reg.fit(df[predictor_cols], df[outcome])  # Fit model
        b_matrix_dict[outcome] = log_reg.coef_[0]  # Store coefficients

# Convert dictionary to DataFrame
b_matrix_full = pd.DataFrame(b_matrix_dict)

# Save the Regression Coefficient (B) Matrix to a CSV file
b_matrix_full.to_csv("Regression_Coefficient_B_Matrix.csv", index=False)

# Print the rows
print("Regression Coefficient (B) Matrix:")
print(b_matrix_full.head())

print("Regression Coefficient (B) Matrix has been saved as 'Regression_Coefficient_B_Matrix.csv'")


Regression Coefficient (B) Matrix:
        Factor  Neck Pain (12 months)  Shoulder Pain (12 months)  \
0          Age              -0.019026                  -0.078520   
1       Gender               0.278976                   0.934761   
2  Height (cm)               0.094390                  -0.001905   
3  Weight (kg)              -0.052088                   0.081637   
4          BMI              -0.034900                   0.066757   

   Upper Back Pain (12 months)  Lower Back Pain (12 months)  \
0                     0.168899                     0.060420   
1                    -0.083663                     0.669368   
2                    -0.134461                     0.098362   
3                     0.072898                    -0.009096   
4                    -0.012825                     0.374524   

   Elbow Pain (12 months)  Wrist/Hand Pain (12 months)  \
0                0.014246                     0.233865   
1               -0.958755                    -0.329255   
2  