# California Housing 

The California housing data has features for latitude and longitude. We create a custom transformer that returns features for the distance away from a given set of coordinates. This custom transformer is used to create features for the distance away from Los Angeles and San Francisco and also includes the original features.

In [117]:
from sklearn.datasets import fetch_california_housing
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion, Pipeline
import numpy as np
from sklearn.linear_model import LinearRegression

In [118]:
data=fetch_california_housing()
lr=LinearRegression()

In [119]:


X=data['data']
y=data['target']
#print(data['DESCR'])

### Creating the Transformers
#### Transformer 1

In [120]:
class DistFromCity(BaseEstimator, TransformerMixin):
    def __init__(self, coord):  
        self.coord=coord   # Datatype: Tuple 
        
    def fit(self, X,y=None):
        """
        It is where we put anything we need to calculate for our transformation process. 
        Anything to be calculated for transformation process of our dataset.
        
        * Here we don't need anything to be calculated
        """
        return self
    
    def transform(self, X):
        """
        Here is where transformation occurs for our estimator.
        """
        lat = X[:,0]
        lon = X[:,1]
        
        dist = np.sqrt((lat - self.coord[0])**2 + (lon - self.coord[1])**2)
        dist=dist.reshape(-1,1)
        return dist

#### Transformer 2

In [121]:
class DropColumn(BaseEstimator, TransformerMixin):
    def __init__(self, ind_cols):    # ind_cols -> the column indexes
        self.ind_cols=ind_cols
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return x[:, self.ind_cols]

In [122]:
coord_LA=(34, -118)
coord_SF=(37,-122)
dist_LA = DistFromCity(coord_LA)
dist_SF = DistFromCity(coord_SF)
drop=DropColumn([0,1,2,3,4,5])
union = FeatureUnion([
    ("drop: ", drop),
    ("LA: ", dist_LA),
    ("SF: ", dist_SF)
])

In [123]:
pipe = Pipeline([('union',union), ("regressor",lr)])

In [124]:
pipe.fit(X,y)

Pipeline(steps=[('union',
                 FeatureUnion(transformer_list=[('drop: ',
                                                 DropColumn(ind_cols=[0, 1, 2,
                                                                      3, 4,
                                                                      5])),
                                                ('LA: ',
                                                 DistFromCity(coord=(34,
                                                                     -118))),
                                                ('SF: ',
                                                 DistFromCity(coord=(37,
                                                                     -122)))])),
                ('regressor', LinearRegression())])

In [125]:
pipe.score(X,y)

0.5492141613373629

In [126]:
pipe.predict(X)

array([4.18553451, 3.99024082, 3.72968357, ..., 0.62812184, 0.77515971,
       1.10086923])