In [1]:
import pandas as pd
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [5]:
dataset = pd.read_csv("/community_health_evaluation_dataset.csv")

In [6]:
print(dataset.isnull().sum())

Participant ID                 0
Age                            0
Gender                         0
SES                            0
Service Type                   0
Visit Frequency                0
Step Frequency (steps/min)     0
Stride Length (m)              0
Joint Angle (°)                0
EMG Activity                   0
Patient Satisfaction (1-10)    0
Quality of Life Score          0
dtype: int64


In [9]:
for column in dataset.select_dtypes(include=np.number).columns:
    dataset[column].fillna(dataset[column].median(), inplace=True)

for column in dataset.select_dtypes(exclude=np.number).columns:
    dataset[column].fillna(dataset[column].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset[column].fillna(dataset[column].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset[column].fillna(dataset[column].mode()[0], inplace=True)


In [10]:
label_encoders = {}
for column in dataset.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    dataset[column] = le.fit_transform(dataset[column])
    label_encoders[column] = le

In [13]:
X = dataset.drop("Visit Frequency", axis=1)  # Replace 'target_column' with the actual column name
y = dataset["Visit Frequency"]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [16]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

In [17]:
results = {}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[model_name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='weighted'),
        "Recall": recall_score(y_test, y_pred, average='weighted'),
        "F1-Score": f1_score(y_test, y_pred, average='weighted')
    }

In [18]:
for model_name, metrics in results.items():
    print(f"{model_name}:")
    for metric, score in metrics.items():
        print(f"  {metric}: {score:.2f}")

Logistic Regression:
  Accuracy: 0.24
  Precision: 0.24
  Recall: 0.24
  F1-Score: 0.23
Decision Tree:
  Accuracy: 0.39
  Precision: 0.37
  Recall: 0.39
  F1-Score: 0.37
Random Forest:
  Accuracy: 0.36
  Precision: 0.35
  Recall: 0.36
  F1-Score: 0.35
SVM:
  Accuracy: 0.31
  Precision: 0.30
  Recall: 0.31
  F1-Score: 0.30


In [19]:
print("Classification Report for Random Forest:")
print(classification_report(y_test, models["Random Forest"].predict(X_test)))

Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.29      0.22      0.25        23
           1       0.39      0.50      0.44        24
           2       0.36      0.35      0.36        23

    accuracy                           0.36        70
   macro avg       0.35      0.36      0.35        70
weighted avg       0.35      0.36      0.35        70

