In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## ExcelからDataを読み込む
- Weather: sunny = 1, rainy = 0
- Car: working = 1, broken = 0
- Class: go-out = 1, stay-home = 0

In [2]:
# Excel内の画像などは読み込めないというwarningが出るが, dataは読み込めているため無視する. 
data = pd.read_excel("09-NaiveBayes.xlsx", usecols=[0, 1, 2], header=17)[:10].astype(np.float64)
data

  warn(msg)


Unnamed: 0,Weather,Car,Class
0,1.0,1.0,1.0
1,0.0,0.0,1.0
2,1.0,1.0,1.0
3,1.0,1.0,1.0
4,1.0,1.0,1.0
5,0.0,0.0,0.0
6,0.0,0.0,0.0
7,1.0,1.0,0.0
8,1.0,0.0,0.0
9,0.0,0.0,0.0


## 要素ごとに集合を作る

In [3]:
sunny = set(data.query('Weather == 1').index)
rainy = set(range(len(data))) - sunny

working = set(data.query('Car == 1').index)
broken = set(range(len(data))) - working

go_out = set(data.query('Class == 1').index)
stay_home = set(range(len(data))) - go_out

## 尤度を計算し, classを判別する. 
$likelihood = P(weather \mid class) \times P(car \mid class) \times P(class) \\
= \frac{len(weather \cap class)}{len(class)} \times \frac{len(car \cap class)}{len(class)} \times \frac{len(class)}{len(data)} \\
= len(weather \cap class) \times len(car \cap class) \times \frac{1}{len(class) \times len(data)}$

In [4]:
# 計算速度向上のため, 予めlen(class) * len(data)をCとおく. 
C0 = len(stay_home) * len(data)
C1 = len(go_out) * len(data)

In [5]:
predict = []
for d in data.itertuples():
    weather = sunny if d.Weather else rainy
    car = working if d.Car else broken
    likelihood0 = len(weather&stay_home) * len(car&stay_home) / C0
    likelihood1 = len(weather&go_out) * len(car&go_out) / C1
    p = 0 if likelihood0 > likelihood1 else 1
    predict.append(p)
    
    print(str(likelihood0) +'\t'+ str(likelihood1) +'\t'+ str(p))

0.04	0.32	1
0.24	0.02	0
0.04	0.32	1
0.04	0.32	1
0.04	0.32	1
0.24	0.02	0
0.24	0.02	0
0.04	0.32	1
0.16	0.08	0
0.24	0.02	0


## 精度を確かめる

In [6]:
accuracy = (predict == data.Class).sum() / len(data)
print(str(accuracy * 100) +'%')

80.0%
