In [1]:
#Import the required modules
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Read the dataset
physio = pd.read_csv('dataset.csv')
physio.head()

Unnamed: 0,Age,Affected body part,Diagnosis,Duration of pain,Intensity of pain,Treatment approach,Duration of treatment
0,24.0,knee,patella fracture,chronic,9 on nprs,manual and mechanical,22 days
1,35.0,knee,patella fracture,chronic,6 on nprs,manual,40 days
2,26.0,knee,patella fracture,chronic,8 on nprs,mechanical,31 days
3,21.0,knee,patella fracture,subacute,7 on nprs,manual and mechanical,15 days
4,48.0,knee,patella fracture,subacute,8 on nprs,manual and mechanical,17 days


In [3]:
#Converting intensity of pain and treatment duration to integer values from string
physio['Intensity of pain'] = physio['Intensity of pain'].apply(lambda x:int(x.split()[0]))
physio['Duration of treatment'] = physio['Duration of treatment'].apply(lambda x:int(x.split()[0]))
physio.head()

Unnamed: 0,Age,Affected body part,Diagnosis,Duration of pain,Intensity of pain,Treatment approach,Duration of treatment
0,24.0,knee,patella fracture,chronic,9,manual and mechanical,22
1,35.0,knee,patella fracture,chronic,6,manual,40
2,26.0,knee,patella fracture,chronic,8,mechanical,31
3,21.0,knee,patella fracture,subacute,7,manual and mechanical,15
4,48.0,knee,patella fracture,subacute,8,manual and mechanical,17


In [4]:
#Assigning Numeric value to different strings in diagnosis
diagnosis = list(physio['Diagnosis'].unique())
physio['Diagnosis'].replace(diagnosis, range(len(diagnosis)), inplace=True)
physio.head()

Unnamed: 0,Age,Affected body part,Diagnosis,Duration of pain,Intensity of pain,Treatment approach,Duration of treatment
0,24.0,knee,0,chronic,9,manual and mechanical,22
1,35.0,knee,0,chronic,6,manual,40
2,26.0,knee,0,chronic,8,mechanical,31
3,21.0,knee,0,subacute,7,manual and mechanical,15
4,48.0,knee,0,subacute,8,manual and mechanical,17


In [5]:
#Assigning integer values to treatment approach and pain duration
physio['Treatment approach'].replace(['manual','mechanical','manual and mechanical'], [0,1,2], inplace=True)
physio['Duration of pain'].replace(['acute','subacute','chronic'], [0,1,2], inplace=True)
physio.head()

Unnamed: 0,Age,Affected body part,Diagnosis,Duration of pain,Intensity of pain,Treatment approach,Duration of treatment
0,24.0,knee,0,2,9,2,22
1,35.0,knee,0,2,6,0,40
2,26.0,knee,0,2,8,1,31
3,21.0,knee,0,1,7,2,15
4,48.0,knee,0,1,8,2,17


In [6]:
#Checking for null values
physio.isnull().sum()

Age                      46
Affected body part        0
Diagnosis                 0
Duration of pain          0
Intensity of pain         0
Treatment approach        0
Duration of treatment     0
dtype: int64

In [7]:
#Since age has many null values, drop age
#We dont need affected body part column since the prediction is based on the diagnosis
physio.drop(['Age','Affected body part'], axis=1, inplace=True)
physio.head()

Unnamed: 0,Diagnosis,Duration of pain,Intensity of pain,Treatment approach,Duration of treatment
0,0,2,9,2,22
1,0,2,6,0,40
2,0,2,8,1,31
3,0,1,7,2,15
4,0,1,8,2,17


In [8]:
from sklearn.model_selection import train_test_split
x = physio.drop(['Duration of treatment'], axis=1)
y = physio['Duration of treatment']
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=101)

In [9]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(xtrain,ytrain)

#Predict the values for training and testing data
pred_Train = model.predict(xtrain)
pred_Test = model.predict(xtest)

#Calculate errors in predictions
from sklearn.metrics import mean_absolute_error, mean_squared_error
print('Training Error:')
print('MAE =', mean_absolute_error(pred_Train, ytrain))
print('MSE =', mean_squared_error(pred_Train, ytrain))
print('RMSE =', mean_squared_error(pred_Train, ytrain) ** 0.5)
print('Testing Error:')
print('MAE =', mean_absolute_error(pred_Test, ytest))
print('MSE =', mean_squared_error(pred_Test, ytest))
print('RMSE =', mean_squared_error(pred_Test, ytest) ** 0.5)
print('\nTraining Score :', model.score(xtrain, ytrain))
print('Testing Score :', model.score(xtest, ytest))

Training Error:
MAE = 0.3720502438533689
MSE = 0.5378268443933008
RMSE = 0.7333667870808581
Testing Error:
MAE = 0.7268415700874717
MSE = 2.2464210349401688
RMSE = 1.498806536861969

Training Score : 0.9863349044655174
Testing Score : 0.9189318914653053


In [None]:
#The data needs to be in the following format for prediction
'''
Diagnosis:
    Patella Fracture:       0
    Cervical Radiculopathy: 1
    Lumbar Radiculopathy:   2
    Frozen Shoulder:        3
    Jennis Elbow:           4
    Osteoarthritis:         5
    Plantar Fasciitis:      6
    
Duration of pain:
    acute: 0
    subacute: 1
    chronic: 2

Intensity of pain:
    On a scale of 0-9

Treatment Approach:
    manual: 0
    mechanical: 1
    manual and mechanical: 2
    
Input Format : [Diagnosis, Duration of pain, Intensity of pain, Treatment Approach]
'''