# Majority Class classifier
A simple example of a classifier in the `sklearn` framework.   
https://sklearn-template.readthedocs.io/en/latest/user_guide.html   
This classifier simply identifies the most frequent class and always predicts that.  
Implementing the classifier entails defining the `fit` and `predict` methods. 

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.base import BaseEstimator, ClassifierMixin
from collections import Counter

In [None]:
penguins = pd.read_csv('penguins.csv', index_col = 0)
print(penguins.shape)
penguins.head()

In [None]:
penguins2C = penguins.loc[penguins['species'].isin(['Adelie','Chinstrap'])]

In [None]:
y = penguins2C.pop('species').values
X_raw = penguins2C.values
feature_names = penguins2C.columns
X_tr_raw, X_ts_raw, y_train, y_test = train_test_split(X_raw, y, random_state=2, test_size=1/2)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_tr_raw)
X_test = scaler.transform(X_ts_raw)
max_k = X_train.shape[1]
X_train.shape, X_test.shape

## Gaussian NB
Running Gaussian Naive Bayes on the penguin dataset.

In [None]:
gnb = GaussianNB()
gnb.fit(X_train,y_train)

In [None]:
gnb.fit(X_train,y_train)
gnb.predict(X_test)

In [None]:
gnb.score(X_test, y_test)

## Majority Class Classifier
An implementation of a Majority Class Classifier that fits the framework.

In [None]:
class MyMCC(BaseEstimator, ClassifierMixin):          
    def fit(self, Xt, yt):
        self.Xt = Xt
        self.yt = yt
        
        c_dict = Counter(self.yt)
        self.most_freq = max(c_dict, key=c_dict.get)
        print(c_dict, self.most_freq)
        return self
    
    # The predictions are the most common class in the training set.
    def predict(self, Xtes):
        self.Xtes = Xtes
        n_test = self.Xtes.shape[0]
        ytes = np.full((n_test), self.most_freq)
        return ytes
    
    def predict_proba(self, Xtes):
        pass
    # We should really be implementing predict_proba as well. 

In [None]:
mcc = MyMCC()

In [None]:
mcc.fit(X_train,y_train)

In [None]:
mcc.predict(X_test)

The `score` method is inherited from `ClassifierMixin`.

In [None]:
mcc.score(X_test, y_test)