In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# 1. Load data
df = pd.read_excel("MSDs Risk - Data.xlsx", sheet_name="Sheet1")

# 2. Specify predictor and outcome columns
predictor_cols = [
    "Age", "Gender", "Height (cm)", "Weight (kg)", "BMI",
    "Medical History", "Smoking Habits", "Alcohol Consumption",
    "Physical Exercise", "Job Tenure (years)", "Working Days/Week",
    "Work Duration (hrs/day)", "Work Breaks (hrs/day)",
    "Table Height (cm)", "Sitting Height (cm)"
]
outcome_vars = [
    "Neck Pain", "Shoulder Pain", "Upper Back Pain ",
    "Lower Back Pain ", "Elbow Pain", "Wrist/Hand Pain",
    "Hips/Thighs Pain", "Knees Pain", "Ankles/Feet Pain"
]

# 3. Preprocess predictors
#   - Numeric columns: leave as-is
#   - Categorical columns: one-hot encode
cat_cols = ["Gender", "Medical History", "Smoking Habits",
            "Alcohol Consumption", "Physical Exercise"]
num_cols = [c for c in predictor_cols if c not in cat_cols]

encoder = OneHotEncoder(drop="first", sparse=False)
X_cat = encoder.fit_transform(df[cat_cols])
cat_feature_names = encoder.get_feature_names_out(cat_cols)

X_num = df[num_cols].to_numpy()
X = np.hstack([X_num, X_cat])
feature_names = num_cols + list(cat_feature_names)

# 4. Fit logistic regression for each outcome and collect coefficients
coef_df = pd.DataFrame(index=feature_names, columns=outcome_vars, dtype=float)

for outcome in outcome_vars:
    y = df[outcome].map({"Yes": 1, "No": 0})
    model = LogisticRegression(max_iter=1000, solver="lbfgs")
    model.fit(X, y)
    coef_df[outcome] = model.coef_.flatten()

# 5. Display the B coefficient matrix
print("Logistic Regression Coefficient (B) Matrix:")
print(coef_df)

# 6. Save the result as an Excel file
output_file = "Logistic_Regression_Coefficient_B_Matrix.xlsx"
coef_df.to_excel(output_file)

print(f"Results saved to {output_file}")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Coefficient (B) Matrix:
                                   Neck Pain  Shoulder Pain  Upper Back Pain   \
Age                                 0.484385       0.131393         -0.045293   
Height (cm)                        -0.178783       0.105808          0.134285   
Weight (kg)                         0.003502      -0.131501         -0.199676   
BMI                                 0.016965       0.304270          0.190960   
Job Tenure (years)                  0.193085      -0.044751          0.164687   
Working Days/Week                   0.536590      -0.207011         -0.097023   
Work Duration (hrs/day)             0.709418       0.533070          0.500177   
Work Breaks (hrs/day)               0.228058       0.055862          0.218607   
Table Height (cm)                  -0.755379       0.026785         -0.189772   
Sitting Height (cm)                 0.170866       0.208766         -0.074613   
Gender_Male                        -0.271575      -0.982247      