In [19]:
import warnings
warnings.filterwarnings("ignore")

In [20]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
import math

%matplotlib inline
matplotlib.rcParams['figure.figsize'] = [8,8]
np.random.seed(42)

**Import Data**

This data is based on Dermatology Data Set (https://archive.ics.uci.edu/ml/datasets/Dermatology)

Number of Instances: 133

Number of Attributes: 12

Attribute Information:
   - Clinical Attributes: (take values 0, 1, 2, 3, unless otherwise indicated)
      - 1: erythema
      - 2: scaling
      - 3: definite borders
      - 4: itching
      - 5: koebner phenomenon
      - 6: polygonal papules
      - 7: follicular papules
      - 8: oral mucosal involvement
      - 9: knee and elbow involvement
      - 10: scalp involvement
      - 11: family history, (0 or 1)
      - 12: Age (linear)
      
  
    "Class_code"______________"Class"_______________"Number of instances"
  
         1             seboreic dermatitis                 61
       
        -1             no_seboreic dermatitis              72

In [21]:
df=pd.read_csv('data/data.csv') 
df.head()

Unnamed: 0,erythema,scaling,definite_borders,itching,koebner_phenomenon,polygonal_papules,follicular_papules,oral_mucosal_involvement,knee_elbow_involvement,scalp_involvement,family_history,Age,output
0,2,2,0,3,0,0,0,0,1,0,0,55,1
1,2,3,2,0,0,0,0,0,0,0,0,41,1
2,2,2,0,3,0,0,0,0,0,0,0,21,1
3,3,3,1,2,0,0,0,0,0,1,0,22,1
4,2,2,1,3,0,0,0,0,0,0,0,30,1


In [22]:
y=df['output'].replace(-1,0)
y.value_counts()

0    72
1    61
Name: output, dtype: int64

In [23]:
df_new=df.drop('output',axis=1)
df_new.head()

Unnamed: 0,erythema,scaling,definite_borders,itching,koebner_phenomenon,polygonal_papules,follicular_papules,oral_mucosal_involvement,knee_elbow_involvement,scalp_involvement,family_history,Age
0,2,2,0,3,0,0,0,0,1,0,0,55
1,2,3,2,0,0,0,0,0,0,0,0,41
2,2,2,0,3,0,0,0,0,0,0,0,21
3,3,3,1,2,0,0,0,0,0,1,0,22
4,2,2,1,3,0,0,0,0,0,0,0,30


In [24]:
no_inputs=df_new.shape[1]

**scaling data**

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(df_new)
df_scaled=pd.DataFrame(scaler.transform(df_new))

df_scaled.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-0.284842,0.268863,-1.693099,1.109696,-0.727312,-0.991339,-0.087039,-0.9378,3.961021,-0.221699,-0.17609,1.347548
1,-0.284842,1.823595,0.461754,-2.145956,-0.727312,-0.991339,-0.087039,-0.9378,-0.187135,-0.221699,-0.17609,0.240251
2,-0.284842,0.268863,-1.693099,1.109696,-0.727312,-0.991339,-0.087039,-0.9378,-0.187135,-0.221699,-0.17609,-1.341601
3,1.362288,1.823595,-0.615672,0.024479,-0.727312,-0.991339,-0.087039,-0.9378,-0.187135,3.054521,-0.17609,-1.262508
4,-0.284842,0.268863,-0.615672,1.109696,-0.727312,-0.991339,-0.087039,-0.9378,-0.187135,-0.221699,-0.17609,-0.629767


### Modeling

In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop

X=df_scaled.values
list_metrics=[]
list_metrics_keras=[]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#scikit learn
clf = RandomForestClassifier(n_estimators=2,random_state=0)
clf.fit(X_train,y_train)    
pred=clf.predict(X_test)


#Keras
model = Sequential()
model.add(Dense(units=2,activation='sigmoid', input_shape=(no_inputs,)))
model.add(Dense(units=1, activation='linear'))

rmsprop=RMSprop(lr=1/math.sqrt(no_inputs))

model.compile(optimizer='rmsprop', 
              loss='binary_crossentropy',#This is the objective that the model will try to minimize
              metrics=['accuracy']);
his=model.fit(df_scaled, y , epochs=69, verbose= 0);
y_pred = model.predict(X_test);

### Results

In [27]:
print('Test Data: \n')
print('F1_SCORE Scikit-Learn: ',f1_score(y_test,pred))
print('F1_SCORES Keras: ',f1_score(y_test, np.round(y_pred)))
print('Evaluate Keras: ',model.evaluate(X_test, y_test)[1])
print('Accuracy Keras: ', his.history["acc"][-1])


Test Data: 

F1_SCORE Scikit-Learn:  1.0
F1_SCORES Keras:  0.9743589743589743
Evaluate Keras:  0.975
Accuracy Keras:  0.9924812030075187
