In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from sklearn.neighbors import DistanceMetric


In [2]:
### load csv file, select specific regions: ME, NH, NY
### note that column 18-22 are information data only

df = pd.read_csv('ushealth05.csv', usecols=range(0,17))
#print("Total records: ", len(df))
#df.head(20)

#df_state = df.loc[df['Area'] == 'Maine']
df_state = df.iloc[[19, 29, 32],:]
#print("\n\nDataset for ME, NH, NY: ")
#df_state.head(3)

df_state = df_state.drop(['Area', 'All'], axis=1)
print("\n\nDataset for ME, NH, NY: ")
df_state.head(3)




Dataset for ME, NH, NY: 


Unnamed: 0,HIV,Malignant,Diabetes,Alzheimer,Heart,TIA,Influenza,Respiratory Diseases,Liver,Nephritis,Accidents,Vehicle Accidents,Suicide,Assault,Firearms
19,11.0,3218.0,385.0,476.0,2941.0,693.0,352.0,830.0,116.0,250.0,579.0,192.0,175.0,22.0,109.0
29,13.0,2549.0,310.0,376.0,2530.0,497.0,273.0,630.0,114.0,173.0,477.0,162.0,162.0,19.0,88.0
32,1644.0,35556.0,4051.0,2065.0,51985.0,6622.0,5521.0,6818.0,1224.0,2360.0,4645.0,1530.0,1189.0,901.0,1019.0


In [3]:
### transfer DataFrame to Numpy array
state_list = np.array(df_state).astype(float)
print("Data dimension: ",np.shape(state_list))
print(state_list)

Data dimension:  (3, 15)
[[1.1000e+01 3.2180e+03 3.8500e+02 4.7600e+02 2.9410e+03 6.9300e+02
  3.5200e+02 8.3000e+02 1.1600e+02 2.5000e+02 5.7900e+02 1.9200e+02
  1.7500e+02 2.2000e+01 1.0900e+02]
 [1.3000e+01 2.5490e+03 3.1000e+02 3.7600e+02 2.5300e+03 4.9700e+02
  2.7300e+02 6.3000e+02 1.1400e+02 1.7300e+02 4.7700e+02 1.6200e+02
  1.6200e+02 1.9000e+01 8.8000e+01]
 [1.6440e+03 3.5556e+04 4.0510e+03 2.0650e+03 5.1985e+04 6.6220e+03
  5.5210e+03 6.8180e+03 1.2240e+03 2.3600e+03 4.6450e+03 1.5300e+03
  1.1890e+03 9.0100e+02 1.0190e+03]]


In [4]:
### Calculate diatance metrics for Euclidean distance

dist = DistanceMetric.get_metric('euclidean')
dist_Euc = np.around(dist.pairwise(state_list), decimals=1, out=None)

print("Euclidean Distance is: \n", dist_Euc)


Euclidean Distance is: 
 [[    0.    857.1 59950.4]
 [  857.1     0.  60713. ]
 [59950.4 60713.      0. ]]


In [5]:
### Calculate diatance metrics for Manhattan distance

dist = DistanceMetric.get_metric('manhattan')
dist_Man = np.around(dist.pairwise(state_list), decimals=2, out=None)

print("Manhattan Distance is: \n", dist_Man)

Manhattan Distance is: 
 [[     0.   1980. 116781.]
 [  1980.      0. 118757.]
 [116781. 118757.      0.]]


In [6]:
### Calculate diatance metrics for Maximum distance

dist = DistanceMetric.get_metric('chebyshev')
dist_Max = np.around(dist.pairwise(state_list), decimals=2, out=None)

print("Maximum Distance is: \n", dist_Max)

Maximum Distance is: 
 [[    0.   669. 49044.]
 [  669.     0. 49455.]
 [49044. 49455.     0.]]
