In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [2]:
df = pd.read_csv("weather.csv")

In [3]:
df.head()

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,Rainy,Hot,High,f,no
1,Rainy,Hot,High,t,no
2,Overcast,Hot,High,f,yes
3,Sunny,Mild,High,f,yes
4,Sunny,Cool,Normal,f,yes


In [4]:
def partition_features(df):
    X = df.drop([df.columns[-1]], axis=1)
    y = df[df.columns[-1]]
    return X,y

In [5]:
X,y = partition_features(df)

In [6]:
for outcome in np.unique(y):
    print(outcome)

no
yes


In [7]:
y.shape[0]

14

In [9]:
#Calculate the prior probability of each class
def calc_prior_prob(y):
    class_prior = {}
    for outcome in np.unique(y):
        outcome_count = sum(y==outcome)
        class_prior[outcome] = round(outcome_count/y.shape[0],3)
    return class_prior

class_prior = calc_prior_prob(y)
class_prior

{'no': 0.357, 'yes': 0.643}

In [10]:
#Calculating likelihood for all the features
likelihoods = {}
def calc_likelihood():
    for feature in X:
        likelihoods[feature] = {}
        for outcome in np.unique(y):
            outcome_count = sum(y==outcome)
            feature_likelihood = X[feature][y[y == outcome].index.values.tolist()].value_counts().to_dict()
            for feature_value, count in feature_likelihood.items():
                likelihoods[feature][feature_value + '_' + outcome] = round(count/outcome_count,2)
    for key in likelihoods:
        print(key, likelihoods[key])
        
calc_likelihood()

Outlook {'Rainy_no': 0.6, 'Sunny_no': 0.4, 'Overcast_yes': 0.44, 'Sunny_yes': 0.33, 'Rainy_yes': 0.22}
Temp {'Mild_no': 0.4, 'Hot_no': 0.4, 'Cool_no': 0.2, 'Mild_yes': 0.44, 'Cool_yes': 0.33, 'Hot_yes': 0.22}
Humidity {'High_no': 0.8, 'Normal_no': 0.2, 'Normal_yes': 0.67, 'High_yes': 0.33}
Windy {'t_no': 0.6, 'f_no': 0.4, 'f_yes': 0.67, 't_yes': 0.33}


In [11]:
pred_priors = {}
def calc_predictor_prior():
    for feature in X:
        pred_priors[feature] = {}
        feat_vals = X[feature].value_counts().to_dict()
        for feat_val, count in feat_vals.items():
            pred_priors[feature][feat_val] = round(count/X.shape[0],2)
    for key in pred_priors:
        print(key,pred_priors[key])
calc_predictor_prior()

Outlook {'Sunny': 0.36, 'Rainy': 0.36, 'Overcast': 0.29}
Temp {'Mild': 0.43, 'Cool': 0.29, 'Hot': 0.29}
Humidity {'Normal': 0.5, 'High': 0.5}
Windy {'f': 0.57, 't': 0.43}


In [12]:
class_priors = {}
pred_priors = {}
likelihoods = {}
class_prior = {}
features = list(X.columns)
def fit(X, y):
    features = list(X.columns)
    X_train = X
    y_train = y
    train_size = X.shape[0]
    num_feats = X.shape[1]
    for feature in features:
        likelihoods[feature] = {}
        pred_priors[feature] = {}
        for feat_val in np.unique(X_train[feature]):
            pred_priors[feature].update({feat_val: 0})
            for outcome in np.unique(y_train):
                class_priors.update({outcome: 0})
    
    for outcome in np.unique(y):
        outcome_count = sum(y==outcome)
        class_prior[outcome] = round(outcome_count/y.shape[0],3)
        
    for feature in X:
        likelihoods[feature] = {}
        for outcome in np.unique(y):
            outcome_count = sum(y==outcome)
            feature_likelihood = X[feature][y[y == outcome].index.values.tolist()].value_counts().to_dict()
            for feature_value, count in feature_likelihood.items():
                likelihoods[feature][feature_value + '_' + outcome] = round(count/outcome_count,2)         
    for feature in X:
        feat_vals = X[feature].value_counts().to_dict()
        for feat_val, count in feat_vals.items():
            pred_priors[feature][feat_val] = count/y.shape[0]

In [13]:
fit(X, y)

In [14]:
def predict(X):
    results = []
    X = np.array(X)
    for query in X:
        probs_outcome = {}
        for outcome in np.unique(y):
            prior = class_priors[outcome]
            likelihood = 1
            evidence = 1
            for feat, feat_val in zip(features, query):
                likelihood *= likelihoods[feat][feat_val + '_' + outcome]
                evidence *= pred_priors[feat][feat_val]
                posterior = (likelihood * prior) / (evidence)
                probs_outcome[outcome] = posterior
        result = max(probs_outcome, key = lambda x: probs_outcome[x])
        results.append(result)
    return np.array(results)

In [15]:
query = np.array([['Rainy','Mild', 'Normal', 't']])
predict(query)

array(['no'], dtype='<U2')

In [16]:
query = np.array([['Sunny','Hot', 'High', 't']])
predict(query)

array(['no'], dtype='<U2')