In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [2]:
from libs import metrics
from libs import kneighbors_classifier
from libs import kneighbors_regression
from libs import simple_linear_regression

In [3]:
housing = fetch_california_housing()

.. _california_housing_dataset:

Conjunto de dados de habitação da Califórnia
--------------------------

**Características do conjunto de dados:**

    :Número de instâncias: 20640

    :Número de atributos: 8 atributos numéricos e preditivos e o destino

    :Informações do atributo:
        - Renda mediana MedInc no grupo de blocos
        - Idade média da casa HouseAge no grupo de quarteirões
        - AveRooms número médio de quartos por família
        - AveBedrms número médio de quartos por família
        - População do grupo de blocos populacionais
        - AveOccup número médio de membros da família
        - Latitude do grupo de blocos de latitude
        - Longitude do grupo de blocos de longitude

    :Valores de atributo ausentes: Nenhum

A variável de destino é o valor médio da casa para os distritos da Califórnia,
expresso em centenas de milhares de dólares ($100.000).

In [4]:
housing.data.shape

(20640, 8)

In [5]:
y = housing.target[0:2000]
x = housing.data[0:2000, 2]

Mean Absolute Error(MAE)

$MAE = \frac{1}{n}\Sigma_{i=1}^{n}|{ŷ}_i-y_i|$

Mean Squared Error(MSE)

$MSE = \frac{1}{n}\Sigma_{i=1}^{n}(ŷ_i-y_i)^2$

Root mean squared error(RMSE)

$ RMSE = \sqrt{\frac{1}{n}\Sigma_{i=1}^{n}(ŷ_i-y_i)^2}$


## Treinando modelos

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=.8, random_state=42)

In [7]:
X_train.shape, X_test.shape

((1600,), (400,))

In [8]:
metrics = metrics.Metrics()

**Regressão linear simples**

In [9]:
lr = simple_linear_regression.SimpleLinearRegression()

In [10]:
lr.fit(X_train, y_train)

In [11]:
y_pred = lr.predict(X_test)

In [12]:
metrics.MAE(y_test, y_pred)

0.7414960304006604

In [13]:
metrics.MSE(y_test, y_pred)

0.8898717481771987

In [14]:
metrics.RMSE(y_test, y_pred)

0.9433301374265527

**Knn regressão**

In [15]:
knn_regression = kneighbors_regression.KNeighborsRegression()

In [16]:
knn_regression.fit(X_train, y_train)

In [17]:
y_pred = knn_regression.predict(X_test)

In [18]:
metrics.MAE(y_test, y_pred)

3.9998941995223527

In [19]:
metrics.MSE(y_test, y_pred)

30.794576873403184

In [20]:
metrics.RMSE(y_test, y_pred)

5.54928615890397

**Knn classificação**

In [21]:
from sklearn.datasets import load_iris

In [22]:
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [23]:
df.shape

(150, 4)

In [24]:
x = df.values
y = data.target

In [25]:
X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=.8, random_state=42)

In [26]:
knn_classifier = kneighbors_classifier.KNeighborsClassifier()

In [27]:
knn_classifier.fit(X_train, y_train)

In [28]:
y_pred = knn_classifier.predict(X_test)

In [29]:
def acc(y_pred, y):
  if(y_pred == y):
    return 1
  return 0

In [30]:
sum(list(map(acc, y_pred, y_test)))/len(y_test)

1.0