In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import seaborn as sns
import matplotlib as mpl
import os

from IPython.core.pylabtools import figsize
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [11]:
#Getting Working Directory
os.getcwd()

#Setting style and font size for graphs
sns.set_style("whitegrid")
mpl.rcParams['font.size'] = 13

In [16]:
#load original data into dataframe
file ='df_clean2.csv'
df_clean2 = pd.read_csv(file)
df_clean2.shape

(96551, 35)

In [17]:
df_clean2.admission_type_id = df_clean2.admission_type_id.astype('object')
df_clean2.discharge_disposition_id = df_clean2.discharge_disposition_id.astype('object')
df_clean2.admission_source_id = df_clean2.admission_source_id.astype('object')

#Encoding Age. To retain the ordinal nature of age 
age_dict = {'[70-80)': 75,'[80-90)': 85, '[90-100)':100, '[60-70)': 65, '[50-60)': 55, '[40-50)': 45, '[30-40)':35,
           '[20-30)': 25, '[10-20)': 15, '[0-10)': 5}
df_clean2.age = df_clean2.age.map(age_dict)

#Visit_num Encoding as Number. '>3' was considered as 4 for simplification purposes
visit_num_dict = {'1.0': 1, '2.0': 2, '3.0': 3, '>3': 4}
df_clean2.visit_num = df_clean2.visit_num.map(visit_num_dict)


In [18]:
numeric = df_clean2.select_dtypes(exclude = 'object')
categorical = df_clean2.select_dtypes(include = 'object')

In [25]:
numeric['readmitted'] = categorical.readmitted
numeric.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96551 entries, 0 to 96550
Data columns (total 11 columns):
age                   96551 non-null int64
time_in_hospital      96551 non-null int64
num_lab_procedures    96551 non-null int64
num_procedures        96551 non-null int64
num_medications       96551 non-null int64
number_outpatient     96551 non-null int64
number_emergency      96551 non-null int64
number_inpatient      96551 non-null int64
number_diagnoses      96551 non-null int64
visit_num             96551 non-null int64
readmitted            96551 non-null object
dtypes: int64(10), object(1)
memory usage: 8.1+ MB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
results = ols('age ~ C(readmitted)', data=numeric).fit()
results.summary()

0,1,2,3
Dep. Variable:,age,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,145.6
Date:,"Thu, 30 Apr 2020",Prob (F-statistic):,7.32e-64
Time:,00:02:29,Log-Likelihood:,-405220.0
No. Observations:,96551,AIC:,810500.0
Df Residuals:,96548,BIC:,810500.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,67.8584,0.158,429.445,0.000,67.549,68.168
C(readmitted)[T.>30],-0.9737,0.181,-5.392,0.000,-1.328,-0.620
C(readmitted)[T.NO],-2.4279,0.173,-14.037,0.000,-2.767,-2.089

0,1,2,3
Omnibus:,4419.352,Durbin-Watson:,1.984
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5110.463
Skew:,-0.53,Prob(JB):,0.0
Kurtosis:,3.382,Cond. No.,6.52


In [39]:
col = ['age','time_in_hospital','visit_num','num_procedures','num_medications','number_outpatient','number_emergency',
      'number_inpatient','number_diagnoses', 'num_lab_procedures']

numeric.groupby('readmitted')[col].mean()

Unnamed: 0_level_0,age,time_in_hospital,visit_num,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,num_lab_procedures
readmitted,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
<30,67.858383,4.755547,1.705865,1.287575,16.688211,0.352981,0.177407,0.811017,7.668049,44.023828
>30,66.884709,4.465802,1.589953,1.251166,16.133867,0.402605,0.176717,0.668094,7.62486,43.603597
NO,65.430499,4.209812,1.306863,1.38484,15.401009,0.242436,0.089081,0.332913,7.169505,41.861514
