In [1]:
import pandas as pd
import os
import sys
sys.path.insert(0, '..')
from paths import *

  from pandas.core import (


In [2]:
CLINICAL_DATA_PATH = '../../wetlab/data/patient_selection/iMSMS_clinical_subset_20240105.xlsx'
SAVE_DATA_PATH = '../../wetlab/data/patient_selection/clinical_data_for_ARMSS_computation_for_MS_base.csv'
UCSF_ONLY_SAVE_DATA_PATH = '../../wetlab/data/patient_selection/ucsf_only_clinical_data_for_ARMSS_computation_for_MS_base.csv'



In [28]:
clinical_data = pd.read_excel(CLINICAL_DATA_PATH, engine='openpyxl')


### Preparing data for computing ARMSS score using the webservice: 
https://aliman.shinyapps.io/ARMSS/

#### Data should be according to the following specs:

#### Notes:
#### 1. Your file should be in CSV format (.csv).
#### 2. It should contain three variables named: ageatedss, dd and edss.

#### Step 1: Selecting only MS patients

In [4]:

clinical_data_ms = clinical_data[clinical_data.Case_Control=='MS Participant']


#### Step 2: Removing patients without EDSS date

In [5]:
clinical_data_ms.dropna(subset='EDSS_Date', inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clinical_data_ms.dropna(subset='EDSS_Date', inplace=True)


#### Step 3: Removing patients without EDSS score

In [6]:
clinical_data_ms.dropna(subset='EDSS', inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clinical_data_ms.dropna(subset='EDSS', inplace=True)


#### Step 4: Calculating Age at EDSS

In [7]:
clinical_data_ms.loc[:, 'EDSS_Date'] = pd.to_datetime(clinical_data_ms['EDSS_Date'])

clinical_data_ms.loc[:, 'EDSS_Year'] = clinical_data_ms['EDSS_Date'].dt.year

clinical_data_ms.loc[:, 'ageatedss'] = clinical_data_ms['EDSS_Year'] - clinical_data_ms['YOB']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clinical_data_ms.loc[:, 'EDSS_Year'] = clinical_data_ms['EDSS_Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clinical_data_ms.loc[:, 'ageatedss'] = clinical_data_ms['EDSS_Year'] - clinical_data_ms['YOB']


#### Step 5: Renaming 'EDSS' column to 'edss'

In [8]:
clinical_data_ms = clinical_data_ms.rename(columns={'EDSS':'edss'})


#### Step 6: Renaming 'Disease Duration (years)' to 'dd'

In [9]:
clinical_data_ms = clinical_data_ms.rename(columns={'Disease Duration (years)':'dd'})



#### Step 8: Extracting relevant columns for ARMSS processing

In [10]:
clinical_data_ms_prepared = clinical_data_ms[['Record ID', 'ageatedss', 'dd', 'edss']]


#### Step 9: Dropping rows with any nan values for the selected columns

In [11]:
clinical_data_ms_prepared.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clinical_data_ms_prepared.dropna(inplace=True)


#### Step 10: Save the data

In [25]:
clinical_data_ms_prepared = clinical_data_ms_prepared[clinical_data_ms_prepared.edss!='? Not disclosed']
clinical_data_ms_prepared = clinical_data_ms_prepared[clinical_data_ms_prepared.edss!='unknown']
clinical_data_ms_prepared = clinical_data_ms_prepared[clinical_data_ms_prepared.edss!='2,5']
clinical_data_ms_prepared = clinical_data_ms_prepared[clinical_data_ms_prepared.edss!='4 (ambulation 1)']
clinical_data_ms_prepared.edss = clinical_data_ms_prepared.edss.astype('float64')

clinical_data_ms_prepared.to_csv(SAVE_DATA_PATH, index=False, header=True)


#### Selecting only UCSF patients

In [13]:
clinical_data_ms_prepared_only_ucsf = clinical_data_ms_prepared[clinical_data_ms_prepared['Record ID'].str.startswith('714')]

clinical_data_ms_prepared_only_ucsf.to_csv(UCSF_ONLY_SAVE_DATA_PATH, index=False, header=True)


In [14]:
print('Total {} MS patients are selected to compute ARMSS score for MS base'.format(clinical_data_ms_prepared['Record ID'].unique().shape[0]))
      
print('Total {} UCSF only MS patients are selected to compute ARMSS score for MS base'.format(clinical_data_ms_prepared_only_ucsf['Record ID'].unique().shape[0]))
      

Total 1348 MS patients are selected to compute ARMSS score for MS base
Total 289 UCSF only MS patients are selected to compute ARMSS score for MS base


#### Output file after ARMSS computation, has following scores:
####    gARMSS: global ARMSS
####    ugMSSS: updated global MSSS
####    ogMSSS: original MSSS
####    lMSSS: local MSSS
####    lARMSS: local ARMSS

#### Ref: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5700773/pdf/10.1177_1352458517690618.pdf

Notes (from above Ref):
Creation of the global ARMSS matrix
A global ARMSS matrix was constructed using the
cross-sectional data set. This matrix included the
ARMSS scores obtained for EDSS scores recorded
between ages of 18 and 75 years.

In [16]:
clinical_data_ms_prepared_only_ucsf

Unnamed: 0,Record ID,ageatedss,dd,edss
36,71401-0001,54.0,4.0,2
39,71401-0003,40.0,4.0,0
42,71401-0004,53.0,24.0,4
43,71401-0005,55.0,31.0,0
46,71401-0008,42.0,10.0,0
...,...,...,...,...
752,71403-0026,51.0,5.0,2
754,71403-0029,57.0,0.0,1.5
756,71403-0032,41.0,5.0,3
757,71403-0034,52.0,28.0,3


In [14]:
clinical_data_ms_prepared.shape

(1349, 4)

In [2]:
## After ARMSS computation and Cuquita EPIC annottation

In [1]:
import pandas as pd


  from pandas.core import (


In [16]:
df = pd.read_excel('../../wetlab/data/patient_selection/ms_base_epic_ids/iMSMS_EPIC.xlsx', engine='openpyxl')
df_ = df[df['EPIC_Status'] == 'EPIC 1 (Yr 20 candidate)']
df_ = df_.sort_values(by='gARMSS', ascending=False)
df_.dropna(subset='gARMSS', inplace=True)


In [33]:
df_top = df_.head(10)
df_bottom = df_.tail(10)

df_top.drop(['lARMSS', 'lMSSS', 'uGMSSS', 'oGMSSS'], axis=1, inplace=True)
df_bottom.drop(['lARMSS', 'lMSSS', 'uGMSSS', 'oGMSSS'], axis=1, inplace=True)

df_top.rename(columns={'gARMSS':'global_ARMSS', 'dd':'Disease Duration (years)'}, inplace=True)
df_bottom.rename(columns={'gARMSS':'global_ARMSS', 'dd':'Disease Duration (years)'}, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_top.drop(['lARMSS', 'lMSSS', 'uGMSSS', 'oGMSSS'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_bottom.drop(['lARMSS', 'lMSSS', 'uGMSSS', 'oGMSSS'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_top.rename(columns={'gARMSS':'global_ARMSS', 'dd':'Disease Duration (years)'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.

In [35]:
with pd.ExcelWriter('../../wetlab/data/patient_selection/ms_base_epic_ids/iMSMS_EPIC_patient_selection.xlsx', engine='openpyxl') as writer:
    df_top.to_excel(writer, sheet_name='patients with high ARMSS score', index=False)
    df_bottom.to_excel(writer, sheet_name='patients with low ARMSS score', index=False)
