# Connecting Workspace and Creating Experiment

In [5]:
import azureml.core
from azureml.core import Workspace
ws = Workspace.from_config()

print("Azure ML SDK Version: ", azureml.core.VERSION)
print(ws.name, ws.location, ws.resource_group, sep='\t')

Azure ML SDK Version:  1.7.0
msa-practice	australiaeast	msa-practice


In [6]:
from azureml.core import Experiment

exp_name = "parkinsons"

experiment = Experiment(workspace=ws, name=exp_name)

# Load Data and Preprocessing

In [61]:
import os
base_url = r"https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/"
data_file = r"parkinsons.data"
desc_file = r"parkinsons.names"

data_url = base_url + data_file
desc_url = base_url + desc_file

if (not os.path.exists(data_file)):
    !wget "$data_url"

if (not os.path.exists(desc_file)):
    !wget "$desc_url"

In [62]:
import pandas as pd

df_ori = pd.read_csv(data_file)
print(df_ori.columns)
df_ori.head()

Index(['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)',
       'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
       'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
       'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA',
       'spread1', 'spread2', 'D2', 'PPE'],
      dtype='object')


Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [63]:
#Data preprocessing
df = df_ori.copy()
df.dropna(inplace=True)
df.drop(["name"], 1, inplace=True)
df.head()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,0.426,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,0.482,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,0.517,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.584,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [64]:
#Train-test split
from sklearn.model_selection import train_test_split

x_df = df.dropna()
y_df = df.pop("status")
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.9, random_state=66)

print(X_train.head())
print(X_test.head())

     MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
32       198.383       215.203       193.104         0.00212   
74       110.793       128.101       107.316         0.00494   
188      114.563       119.167        86.647         0.00327   
159      127.930       138.752       112.173         0.00605   
175      115.380       123.109       108.634         0.00332   

     MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  \
32            0.00001   0.00113   0.00135     0.00339       0.01263   
74            0.00004   0.00260   0.00283     0.00780       0.02442   
188           0.00003   0.00146   0.00184     0.00439       0.01185   
159           0.00005   0.00321   0.00318     0.00962       0.03235   
175           0.00003   0.00160   0.00199     0.00480       0.01503   

     MDVP:Shimmer(dB)    ...     Shimmer:DDA      NHR     HNR  status  \
32              0.111    ...         0.01919  0.00119  30.775       0   
74              0.216    ...         0.042

# Training Model

In [65]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression

model1 = LinearRegression().fit(X_train, y_train)
model2 = LogisticRegression(random_state=0).fit(X_train, y_train)



In [69]:
model2.score(X_test, y_test)

0.9261363636363636

In [67]:
model.predict(X_test)

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1])

In [68]:
print(y_test)

7      1
120    1
99     1
98     1
161    1
181    1
147    1
174    0
152    1
18     1
81     1
183    0
17     1
134    1
154    1
96     1
48     0
130    1
148    1
0      1
93     1
106    1
169    0
143    1
115    1
68     1
19     1
8      1
73     1
95     1
      ..
116    1
110    1
145    1
94     1
135    1
158    1
173    0
185    0
83     1
70     1
87     1
78     1
42     0
193    0
121    1
64     0
114    1
71     1
126    1
34     0
172    0
57     1
190    0
44     0
9      1
40     1
12     1
176    0
6      1
2      1
Name: status, Length: 176, dtype: int64
