In [2]:
# Use `pivot` to reshape the dataframe, setting `patient_id` as the index,
# then pivoting `visit` into columns and keeping the features as values.
df_wide = df.pivot(index='patient_id', columns='visit')

# Optionally, rename the columns to have a more readable multi-level structure
# Flatten the column multi-index to make it easier to work with
df_wide.columns = pd.MultiIndex.from_tuples(
    [(col[0], f'visit_{col[1]}') for col in df_wide.columns],
    names=['Feature', 'Visit']
)

# Show the resulting wide-format dataframe
print(df_wide)

Feature    feature_1                               feature_2            \
Visit        visit_1   visit_2   visit_3   visit_4   visit_1   visit_2   
patient_id                                                               
1           0.374540  0.598658  0.058084  0.708073  0.950714  0.156019   
2           0.832443  0.183405  0.431945  0.139494  0.212339  0.304242   
3           0.456070  0.514234  0.607545  0.948886  0.785176  0.592415   

Feature                        feature_3                                \
Visit        visit_3   visit_4   visit_1   visit_2   visit_3   visit_4   
patient_id                                                               
1           0.866176  0.020584  0.731994  0.155995  0.601115  0.969910   
2           0.291229  0.292145  0.181825  0.524756  0.611853  0.366362   
3           0.170524  0.965632  0.199674  0.046450  0.065052  0.808397   

Feature     mds_updrs                                   
Visit         visit_1    visit_2    visit_3    visit_

In [3]:
df_wide.head()

Feature,feature_1,feature_1,feature_1,feature_1,feature_2,feature_2,feature_2,feature_2,feature_3,feature_3,feature_3,feature_3,mds_updrs,mds_updrs,mds_updrs,mds_updrs
Visit,visit_1,visit_2,visit_3,visit_4,visit_1,visit_2,visit_3,visit_4,visit_1,visit_2,visit_3,visit_4,visit_1,visit_2,visit_3,visit_4
patient_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
1,0.37454,0.598658,0.058084,0.708073,0.950714,0.156019,0.866176,0.020584,0.731994,0.155995,0.601115,0.96991,30.461377,9.767211,68.423303,44.015249
2,0.832443,0.183405,0.431945,0.139494,0.212339,0.304242,0.291229,0.292145,0.181825,0.524756,0.611853,0.366362,12.203823,49.517691,3.438852,90.93204
3,0.45607,0.514234,0.607545,0.948886,0.785176,0.592415,0.170524,0.965632,0.199674,0.04645,0.065052,0.808397,25.877998,66.252228,31.171108,52.006802


In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

class LoLiMoTNode:
    def __init__(self):
        self.linear_model = None  # Linear model in this region
        self.split_feature = None  # Feature used to split this node
        self.split_value = None  # Value of the feature for splitting
        self.left = None  # Left child node
        self.right = None  # Right child node

class LoLiMoT:
    def __init__(self, max_depth=10, min_error=1e-3):
        self.max_depth = max_depth
        self.min_error = min_error
        self.root = None

    def fit(self, X, y):
        # Start recursion with the whole dataset
        self.root = self._fit_recursive(X, y, depth=0)

    def _fit_recursive(self, X, y, depth):
        # Create a new node
        node = LoLiMoTNode()

        # Fit a linear model for the current node
        node.linear_model = LinearRegression()
        node.linear_model.fit(X, y)

        # Predict the output and calculate the error
        y_pred = node.linear_model.predict(X)
        error = mean_squared_error(y, y_pred)

        # Stopping criteria: If the max depth is reached or the error is below the threshold
        if depth >= self.max_depth or error <= self.min_error:
            return node

        # Find the best feature and value to split the data
        best_split_feature, best_split_value, best_error = self._find_best_split(X, y, node)

        # If no significant improvement is found, stop splitting
        if best_split_feature is None or best_error >= error:
            return node

        # Otherwise, split the data and recurse
        node.split_feature = best_split_feature
        node.split_value = best_split_value

        left_indices = X[:, best_split_feature] <= best_split_value
        right_indices = X[:, best_split_feature] > best_split_value

        node.left = self._fit_recursive(X[left_indices], y[left_indices], depth + 1)
        node.right = self._fit_recursive(X[right_indices], y[right_indices], depth + 1)

        return node

    def _find_best_split(self, X, y, node):
        # Initialize variables to store the best split
        best_feature = None
        best_value = None
        best_error = float('inf')

        # Try splitting on each feature
        for feature_idx in range(X.shape[1]):
            # Sort the data by the current feature
            sorted_indices = np.argsort(X[:, feature_idx])
            X_sorted, y_sorted = X[sorted_indices], y[sorted_indices]

            # Try every possible split point
            for i in range(1, len(X_sorted)):
                if X_sorted[i, feature_idx] == X_sorted[i - 1, feature_idx]:
                    continue

                split_value = (X_sorted[i, feature_idx] + X_sorted[i - 1, feature_idx]) / 2

                # Split the data into two parts
                left_indices = X[:, feature_idx] <= split_value
                right_indices = X[:, feature_idx] > split_value

                if left_indices.sum() == 0 or right_indices.sum() == 0:
                    continue

                # Fit a linear model on both parts
                left_model = LinearRegression()
                right_model = LinearRegression()

                left_model.fit(X[left_indices], y[left_indices])
                right_model.fit(X[right_indices], y[right_indices])

                # Calculate the combined error
                y_left_pred = left_model.predict(X[left_indices])
                y_right_pred = right_model.predict(X[right_indices])

                error_left = mean_squared_error(y[left_indices], y_left_pred)
                error_right = mean_squared_error(y[right_indices], y_right_pred)

                combined_error = (error_left * left_indices.sum() + error_right * right_indices.sum()) / len(y)

                # Update the best split if this one is better
                if combined_error < best_error:
                    best_feature = feature_idx
                    best_value = split_value
                    best_error = combined_error

        return best_feature, best_value, best_error

    def predict(self, X):
        # Predict the output for each sample in X
        return np.array([self._predict_recursive(x, self.root) for x in X])

    def _predict_recursive(self, x, node):
        # If this is a leaf node, use the linear model to predict
        if node.split_feature is None:
            return node.linear_model.predict([x])[0]

        # Otherwise, recurse into the left or right child
        if x[node.split_feature] <= node.split_value:
            return self._predict_recursive(x, node.left)
        else:
            return self._predict_recursive(x, node.right)

# Example usage:
if __name__ == "__main__":
    # Create a dummy dataset
    np.random.seed(42)
    X = np.random.rand(100, 3)  # 100 samples, 3 features
    y = 5 * X[:, 0] - 3 * X[:, 1] + 2 * X[:, 2] + np.random.randn(100) * 0.1  # Linear relation with noise

    # Train the LoLiMoT model
    model = LoLiMoT(max_depth=5, min_error=1e-3)
    model.fit(X, y)

    # Make predictions
    y_pred = model.predict(X)

    # Evaluate the model
    print("Mean Squared Error:", mean_squared_error(y, y_pred))