In [1]:
import pandas as pd
import numpy as np

# Sample sklearn-style algorithm

### Rough sklearn strategy

In [2]:
class ModelClass:

    def __init__(self, *arg, **kwargs):
        '''
        Establishes hyperparameters
        '''
        pass
    
    
    def fit(self, X, y): 
        '''
        X = training data features
        y = training data labels
        
        Takes in training data; stores whatever
        information is need to make future predictions
        '''
        pass
    
    
    def predict(self, x):  
        '''
        x = new data point (features, only)
        
        Returns label prediction for x
        '''
        
        pass
    
    
    def predict_proba(self, x):  
        '''
        x = new data point (features, only)
        
        If classification model: returns probabilty
        of x belonging to each class
        '''
        
        pass


### Making our own practice class:  Mean Regressor 

The simplest possible model is to *always predict the average*. 

Let's make this model into a class in sklearn-style

In [3]:
class Mean_Regressor():
    
    def __init__(self, *arg, **kwargs):    #no hyperparameters
        pass
    
    def fit(self, X, y): 
        self.mean_prediction = y.mean()
        return self
    
    def predict(self, x):  
        return self.mean_prediction

In [4]:
#importing sample dataset
from sklearn.datasets import load_boston

boston = load_boston(return_X_y=False)

X = pd.DataFrame(boston.data[:, (0, 5, 6)], columns = ['Crime_Rate', 'Avg_Rooms', 'Pct_built_b4_1940'])
y = boston.target

In [5]:
#X = features
X.shape

(506, 3)

In [6]:
#X = features
X.head()

Unnamed: 0,Crime_Rate,Avg_Rooms,Pct_built_b4_1940
0,0.00632,6.575,65.2
1,0.02731,6.421,78.9
2,0.02729,7.185,61.1
3,0.03237,6.998,45.8
4,0.06905,7.147,54.2


In [7]:
#y = target = Median value of Homes (in $1000's)
y[:5]

array([24. , 21.6, 34.7, 33.4, 36.2])

In [8]:
model = Mean_Regressor()

In [9]:
model.mean_prediction

AttributeError: 'Mean_Regressor' object has no attribute 'mean_prediction'

In [10]:
model.fit(X, y)

<__main__.Mean_Regressor at 0x11e58fa58>

In [11]:
model.mean_prediction

22.532806324110677

In [12]:
#Let's say we have a new town with these features:
'''
Crime_Rate            0.09
Avg_Rooms             6.41
Pct_built_b4_1940    84.10
'''

new_town = np.array([.09, 6.41, 84.10])

In [13]:
#Let's use our model to predict the median value of homes in the new town
model.predict(new_town)

22.532806324110677