In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('fertility.csv')
df = df.drop(['Season'], axis=1)
df.head()

Unnamed: 0,Age,Childish diseases,Accident or serious trauma,Surgical intervention,High fevers in the last year,Frequency of alcohol consumption,Smoking habit,Number of hours spent sitting per day,Diagnosis
0,30,no,yes,yes,more than 3 months ago,once a week,occasional,16,Normal
1,35,yes,no,yes,more than 3 months ago,once a week,daily,6,Altered
2,27,yes,no,no,more than 3 months ago,hardly ever or never,never,9,Normal
3,32,no,yes,yes,more than 3 months ago,hardly ever or never,never,7,Normal
4,30,yes,yes,no,more than 3 months ago,once a week,never,9,Altered


In [4]:
# Create a label encoder object
from sklearn.preprocessing import LabelEncoder
feature_mod = ['Childish diseases', 'Accident or serious trauma', 'Surgical intervention']
encode = LabelEncoder()
for i in feature_mod:
    df[i] = encode.fit_transform(df[i])

df.head()

Unnamed: 0,Age,Childish diseases,Accident or serious trauma,Surgical intervention,High fevers in the last year,Frequency of alcohol consumption,Smoking habit,Number of hours spent sitting per day,Diagnosis
0,30,0,1,1,more than 3 months ago,once a week,occasional,16,Normal
1,35,1,0,1,more than 3 months ago,once a week,daily,6,Altered
2,27,1,0,0,more than 3 months ago,hardly ever or never,never,9,Normal
3,32,0,1,1,more than 3 months ago,hardly ever or never,never,7,Normal
4,30,1,1,0,more than 3 months ago,once a week,never,9,Altered


In [5]:
dfnew = df.drop(['Diagnosis'], axis=1)
dfnew = pd.get_dummies(dfnew)
dfnew.head()

Unnamed: 0,Age,Childish diseases,Accident or serious trauma,Surgical intervention,Number of hours spent sitting per day,High fevers in the last year_less than 3 months ago,High fevers in the last year_more than 3 months ago,High fevers in the last year_no,Frequency of alcohol consumption_every day,Frequency of alcohol consumption_hardly ever or never,Frequency of alcohol consumption_once a week,Frequency of alcohol consumption_several times a day,Frequency of alcohol consumption_several times a week,Smoking habit_daily,Smoking habit_never,Smoking habit_occasional
0,30,0,1,1,16,0,1,0,0,0,1,0,0,0,0,1
1,35,1,0,1,6,0,1,0,0,0,1,0,0,1,0,0
2,27,1,0,0,9,0,1,0,0,1,0,0,0,0,1,0
3,32,0,1,1,7,0,1,0,0,1,0,0,0,0,1,0
4,30,1,1,0,9,0,1,0,0,0,1,0,0,0,1,0


In [8]:
X = dfnew
Y = df['Diagnosis']

from sklearn.model_selection import train_test_split
xtr , xts, ytr, yts = train_test_split(X, Y,test_size = .1, random_state=90)

In [11]:
from sklearn.linear_model import LogisticRegression
model_log = LogisticRegression(solver='liblinear')
model_log.fit(xtr,ytr)
prediksi_log = model_log.predict(xts)
skor_log = model_log.score(xts, yts)

print ("Logistic Regression Accuracy : %s" % "{0:.4%}".format(skor_log))

Logistic Regression Accuracy : 100.0000%


In [14]:
from sklearn.tree import DecisionTreeClassifier
model_DT = DecisionTreeClassifier()
model_DT.fit(xtr,ytr)
prediksi_DT = model_DT.predict(xts)
skor_DT = model_DT.score(xts, yts)

print ("Decision Tree Accuracy : %s" % "{0:.4%}".format(skor_DT))

Decision Tree Accuracy : 70.0000%


In [15]:
from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators= 50)
model_rf.fit(xtr,ytr)
prediksi_rf = model_rf.predict(xts)
skor_rf = model_rf.score(xts, yts)

print ("Random Forest Accuracy : %s" % "{0:.4%}".format(skor_rf))

Random Forest Accuracy : 90.0000%


#### Prediction

In [16]:
print('Arin, prediksi kesuburan: {} (Logistic Regression)'.format(model_log.predict([[29,0,0,0,5,0,0,1,      1,0,0,0,0,1,0,0]])[0]))
print('Arin, prediksi kesuburan: {} (Decision Tree Classifier)'.format(model_DT.predict([[29,0,0,0,5,0,0,1,  1,0,0,0,0,1,0,0]])[0]))
print('Arin, prediksi kesuburan: {} (Random Forest Classifier)'.format(model_rf.predict([[29,0,0,0,5,0,0,1,  1,0,0,0,0,1,0,0]])[0]))

Arin, prediksi kesuburan: Normal (Logistic Regression)
Arin, prediksi kesuburan: Normal (Decision Tree Classifier)
Arin, prediksi kesuburan: Normal (Random Forest Classifier)


In [17]:
print('Bebi, prediksi kesuburan: {} (Logistic Regression)'.format(model_log.predict([[31,0,1,1,24,0,0,1,      0,0,0,0,1,0,1,0]])[0]))
print('Bebi, prediksi kesuburan: {} (Decision Tree Classifier)'.format(model_DT.predict([[31,0,1,1,24,0,0,1,  0,0,0,0,1,0,1,0]])[0]))
print('Bebi, prediksi kesuburan: {} (Random Forest Classifier)'.format(model_rf.predict([[31,0,1,1,24,0,0,1,  0,0,0,0,1,0,1,0]])[0]))

Bebi, prediksi kesuburan: Normal (Logistic Regression)
Bebi, prediksi kesuburan: Normal (Decision Tree Classifier)
Bebi, prediksi kesuburan: Normal (Random Forest Classifier)


In [18]:
print('Caca, prediksi kesuburan: {} (Logistic Regression)'.format(model_log.predict([[25,1,0,0,7,1,0,0,      0,1,0,0,0,0,1,0]])[0]))
print('Caca, prediksi kesuburan: {} (Decision Tree Classifier)'.format(model_DT.predict([[25,1,0,0,7,1,0,0,  0,1,0,0,0,0,1,0]])[0]))
print('Caca, prediksi kesuburan: {} (Random Forest Classifier)'.format(model_rf.predict([[25,1,0,0,7,1,0,0,  0,1,0,0,0,0,1,0]])[0]))

Caca, prediksi kesuburan: Normal (Logistic Regression)
Caca, prediksi kesuburan: Normal (Decision Tree Classifier)
Caca, prediksi kesuburan: Normal (Random Forest Classifier)


In [19]:
print('Dini, prediksi kesuburan: {} (Logistic Regression)'.format(model_log.predict([[28,0,1,1,24,0,0,1,      0,1,0,0,0,1,0,0]])[0]))
print('Dini, prediksi kesuburan: {} (Decision Tree Classifier)'.format(model_DT.predict([[28,0,1,1,24,0,0,1,  0,1,0,0,0,1,0,0]])[0]))
print('Dini, prediksi kesuburan: {} (Random Forest Classifier)'.format(model_rf.predict([[28,0,1,1,24,0,0,1,  0,1,0,0,0,1,0,0]])[0]))

Dini, prediksi kesuburan: Normal (Logistic Regression)
Dini, prediksi kesuburan: Normal (Decision Tree Classifier)
Dini, prediksi kesuburan: Normal (Random Forest Classifier)


In [20]:
print('Enno, prediksi kesuburan: {} (Logistic Regression)'.format(model_log.predict([[42,1,0,0,8,0,0,1,      0,1,0,0,0,0,1,0]])[0]))
print('Enno, prediksi kesuburan: {} (Decision Tree Classifier)'.format(model_DT.predict([[42,1,0,0,8,0,0,1,  0,1,0,0,0,0,1,0]])[0]))
print('Enno, prediksi kesuburan: {} (Random Forest Classifier)'.format(model_rf.predict([[42,1,0,0,8,0,0,1,  0,1,0,0,0,0,1,0]])[0]))

Enno, prediksi kesuburan: Normal (Logistic Regression)
Enno, prediksi kesuburan: Normal (Decision Tree Classifier)
Enno, prediksi kesuburan: Normal (Random Forest Classifier)
