In [1]:
import pandas as pd
from pandas import Series
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier

from sklearn import linear_model
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor


# https://scikit-learn.org/stable/auto_examples/svm/plot_svm_regression.html#sphx-glr-auto-examples-svm-plot-svm-regression-py

In [6]:
#Import data: Diabetes
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - Age
      - Sex
      - Body mass index
      - Average blood pressure
      - S1
      - S2
      - S3
      - S4
      - S5
      - S6

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).

Source URL:
https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html

For more information see:
Bra

In [12]:
#transform data into DF
df_diabetes = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df_diabetes['target']= pd.DataFrame(diabetes.target, columns=['target'])

df_diabetes.head()



Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [33]:
#split data
X,y=df_diabetes.iloc[:,:-1],df_diabetes.target

In [69]:
#train & test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 13)

In [70]:
#scale data
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler(with_mean=0, with_std=1)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


In [71]:
#apply SVM
from sklearn.svm import SVR

In [82]:
reg = SVR(kernel='rbf')
reg_lin = SVR(kernel='linear')
reg_poly = SVR(kernel='poly',degree=3)

reg.fit(X_train, y_train)
reg_lin.fit(X_train, y_train)
reg_poly.fit(X_train, y_train)




SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='poly', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [83]:
y_pred = reg.predict(X_test)
y_predlin = reg_lin.predict(X_test)
y_predpoly = reg_poly.predict(X_test)

test_acc = reg.score(X_test,y_test)
test_acclin = reg_lin.score(X_test,y_test)
test_accpoly = reg_poly.score(X_test,y_test)
print(test_acc, test_acclin, test_accpoly)

0.14712786504401631 0.4447708474256924 0.27313040695280566


In [100]:
X_train.shape

(309, 10)

In [103]:
#KNN

from sklearn.datasets import load_iris
iris = load_iris()
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [110]:
X,y = iris.data, iris.target
Xi_train, Xi_test, yi_train, yi_test = train_test_split(X, y, test_size = 0.3, random_state = 13)

In [111]:
#scale data
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler(with_mean=0, with_std=1)
scaler.fit(Xi_train)
Xi_train = scaler.transform(Xi_train)
Xi_test = scaler.transform(Xi_test)

In [145]:
#predict
classifier = KNeighborsClassifier(n_neighbors=9)
classifier.fit(Xi_train, yi_train)

y_pred = classifier.predict(Xi_test)

In [146]:
#accuracy
test_acc_class = classifier.score(Xi_test,yi_test)
print(test_acc_class)

0.9777777777777777


In [147]:
#confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(yi_test, y_pred))
print(classification_report(yi_test, y_pred))

[[14  0  0]
 [ 0 12  0]
 [ 0  1 18]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.92      1.00      0.96        12
           2       1.00      0.95      0.97        19

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [149]:
# SUPER VECTOR MACHINE CLASSIFIER

class2 = svm.SVC()
class2.fit(Xi_train, yi_train)
yc_predict = class2.predict(Xi_test)

test_acc_class2 = class2.score(Xi_test,yi_test)
print(test_acc_class2)

0.9777777777777777


