In [15]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
%matplotlib inline

In [2]:
data = datasets.load_diabetes()

In [3]:
X = data.data[:, [-2, -1]]
Y = data.target

print(f'X: {X} Y:{Y}')


X: [[ 0.01990749 -0.01764613]
 [-0.06833155 -0.09220405]
 [ 0.00286131 -0.02593034]
 [ 0.02268774 -0.00936191]
 [-0.03198764 -0.04664087]
 [-0.04117617 -0.09634616]
 [-0.06291688 -0.03835666]
 [-0.03581619  0.00306441]
 [-0.01495969  0.01134862]
 [ 0.06773705 -0.01350402]
 [-0.06291688 -0.03421455]
 [-0.09643495 -0.05906719]
 [-0.03074792 -0.04249877]
 [ 0.03839393 -0.01350402]
 [-0.03198764 -0.07563562]
 [ 0.03606033 -0.04249877]
 [ 0.05227699  0.02791705]
 [ 0.02736405 -0.0010777 ]
 [-0.01811369 -0.01764613]
 [-0.0089434  -0.05492509]
 [-0.01189685  0.01549073]
 [-0.07213275 -0.01764613]
 [-0.0611758  -0.01350402]
 [ 0.13359728  0.13561183]
 [-0.02595311 -0.05492509]
 [ 0.01919647 -0.03421455]
 [-0.04257085 -0.0052198 ]
 [-0.01599887  0.04034337]
 [-0.00061174 -0.05492509]
 [ 0.05942362  0.05276969]
 [-0.02712902 -0.00936191]
 [-0.03712884 -0.04249877]
 [ 0.00027248  0.02791705]
 [-0.01811369  0.00306441]
 [-0.05947118 -0.06735141]
 [ 0.02131129  0.01963284]
 [ 0.05471997 -0.02593034

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=2222)

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

print(f'XTRAIN: {X_train_std} XTEST:{X_test_std}')


XTRAIN: [[-0.45009476  1.19270635]
 [-0.54746514  0.85903817]
 [ 1.09966969  1.10928931]
 [ 1.72773712  1.02587226]
 [-0.67653309  0.19170181]
 [-0.42655884 -0.30880046]
 [-1.06051025 -0.14196637]
 [-0.62376708 -0.97613681]
 [ 0.39700858 -0.14196637]
 [ 0.02954453 -0.05854932]
 [-0.33488263  0.10828477]
 [-0.35746952 -0.22538341]
 [-1.12694228 -0.30880046]
 [-0.7585292   0.10828477]
 [ 1.25075512 -1.22638795]
 [-0.81509133 -0.89271977]
 [-0.29084768 -0.89271977]
 [-0.16424721 -0.3922175 ]
 [-2.13082522 -1.56005613]
 [-0.93390977  0.44195295]
 [-1.91368737 -1.1429709 ]
 [ 0.93548767 -1.72689022]
 [-0.20543507 -0.30880046]
 [ 2.75079045  0.10828477]
 [ 0.01113336  2.36054498]
 [-1.26777819  0.77562113]
 [ 0.35088577  1.94345975]
 [ 0.57106051  0.52536999]
 [-0.70348552 -0.64246864]
 [-1.23152528 -1.47663908]
 [-1.1960316  -1.72689022]
 [-0.87355152 -0.14196637]
 [ 0.39700858  0.10828477]
 [-0.45009476  0.02486772]
 [-1.96569416 -0.72588568]
 [ 1.31737696 -0.72588568]
 [-0.14393798 -1.142

In [6]:

reg = LinearRegression()
reg.fit(X_train_std, Y_train)

y_pred = reg.predict(X_test_std)

print("MSE:", mean_squared_error(Y_test, y_pred))
print("R2 Score:", r2_score(Y_test, y_pred))

MSE: 3998.6441368096125
R2 Score: 0.28659712419703653


In [8]:
# Convert regression outputs to nearest integer for classification metrics
y_pred_class = np.round(y_pred)
Y_test_class = np.round(Y_test)

print(f'matriz de confucion: \n\n{confusion_matrix(Y_test_class, y_pred_class)}')
print(f"Accuracy: \n{accuracy_score(Y_test_class, y_pred_class)}")
print(f"Classification report:\n{classification_report(Y_test_class, y_pred_class)}")

matriz de confucion: 

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Accuracy: 
0.01694915254237288
Classification report:
              precision    recall  f1-score   support

        39.0       0.00      0.00      0.00         1
        42.0       0.00      0.00      0.00         2
        44.0       0.00      0.00      0.00         1
        47.0       0.00      0.00      0.00         1
        49.0       0.00      0.00      0.00         1
        50.0       0.00      0.00      0.00         1
        52.0       0.00      0.00      0.00         3
        53.0       0.00      0.00      0.00         2
        55.0       0.00      0.00      0.00         0
        57.0       0.00      0.00      0.00         1
        59.0       0.00      0.00      0.00         1
        60.0       0.00      0.00      0.00         1
        63.0       0.00      0.00      0.00         2
        64.0       0.00      0.00      0.00   

  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  _warn_prf(average, modifier, f"{me

In [None]:
from matplotlib import pyplot as plt

def plotting_db(X, y, classifier):
    markers = ('x','>','*')
    colors = ('red','purple','yellow','green')
    res = 0.02
    
    #Plot regions
    x1min, x1max = X[:,0].min() -1, X[:,0].max() + 1
    x2min, x2max = X[:,1].min() -1, X[:,1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1min,x1max,res),np.arange(x2min,x2max,res))

    output = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    output = output.reshape(xx1.shape)
    
    plt.figure(figsize=(8,8))
    #plt.pcolormesh(xx1,xx2, output, alpha=0.8, cmap=plt.cm.cool)
    plt.contourf(xx1, xx2, output, alpha=0.8, cmap=plt.cm.cool)
    
    #PLOT ALL SAMPLES
    for index, item in enumerate(np.unique(y)):
        plt.scatter(x=X[y == item, 0], y=X[y == item, 1],alpha=0.8, c=colors[index], s=50, edgecolor='k',
        marker=markers[index], label=item)


    plt.xlim(xx1.min(),xx1.max())
    plt.ylim(xx2.min(),xx2.max())
    plt.legend(loc='best',bbox_to_anchor=(0.5, 1.05),
          ncol=3,fancybox=True, shadow=True)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((Y_train, Y_test))
plotting_db(X=X_combined_std, y=y_combined, classifier=nb)

In [None]:
flores = datasets.load_iris()
x = flores.data
y = flores.target

print(flores.target)


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [11]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2018)

In [12]:
scaler = StandardScaler()
x_train_std = scaler.fit_transform(x_train)
x_test_std = scaler.transform(x_test)

In [16]:
model = GaussianNB()
model.fit(x_train_std, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [17]:
print(confusion_matrix(y_test, model.predict(x_test_std)))

[[18  0  0]
 [ 0 11  0]
 [ 0  1 15]]
