## Classification

In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("iris.data")

In [10]:
[float(i) for i in df.columns[:-1]]

[5.1, 3.5, 1.4, 0.2]

In [11]:
df.loc[-1] = df.columns

In [13]:
df.index = df.index + 1

In [16]:
df = df.sort_index()

In [18]:
df.columns = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']

In [20]:
from sklearn.model_selection import train_test_split

In [21]:
y = df['class']
X = df.drop(columns='class')

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state=42)

In [27]:
print(y_test)

73     Iris-versicolor
18         Iris-setosa
118     Iris-virginica
78     Iris-versicolor
76     Iris-versicolor
31         Iris-setosa
64     Iris-versicolor
141     Iris-virginica
68     Iris-versicolor
82     Iris-versicolor
110     Iris-virginica
12         Iris-setosa
36         Iris-setosa
9          Iris-setosa
19         Iris-setosa
56     Iris-versicolor
104     Iris-virginica
69     Iris-versicolor
55     Iris-versicolor
132     Iris-virginica
29         Iris-setosa
127     Iris-virginica
26         Iris-setosa
128     Iris-virginica
131     Iris-virginica
145     Iris-virginica
108     Iris-virginica
143     Iris-virginica
45         Iris-setosa
30         Iris-setosa
Name: class, dtype: object


In [32]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [29]:
lg_clf = LogisticRegression(random_state=0, solver='lbfgs')

In [30]:
lg_clf.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solver='lbfgs', tol=0.0001,
          verbose=0, warm_start=False)

In [34]:
accuracy_score(lg_clf.predict(X_test), y_test)

0.9666666666666667

In [35]:
from sklearn import svm

In [36]:
svm_clf = svm.SVC()

In [37]:
svm_clf.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [38]:
accuracy_score(svm_clf.predict(X_test), y_test)

1.0

In [39]:
from sklearn.model_selection import GridSearchCV

In [40]:
my_params_grid = {'C':[1,10,100,0.1,0.01],'gamma':[1,10,0.1,0.01,0.001,0.0001]}

In [41]:
my_grid = GridSearchCV(svm.SVC(), my_params_grid, verbose=3, refit=True)

In [42]:
my_grid.fit(X_train, y_train)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] C=1, gamma=1 ....................................................
[CV] ............... C=1, gamma=1, score=0.951219512195, total=   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] .......................... C=1, gamma=1, score=0.9, total=   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ............... C=1, gamma=1, score=0.974358974359, total=   0.0s
[CV] C=1, gamma=10 ...................................................
[CV] .............. C=1, gamma=10, score=0.926829268293, total=   0.0s
[CV] C=1, gamma=10 ...................................................
[CV] ....................... C=1, gamma=10, score=0.925, total=   0.0s
[CV] C=1, gamma=10 ...................................................
[CV] .............. C=1, gamma=10, score=0.974358974359, total=   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] ...........

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ................. C=100, gamma=0.0001, score=0.925, total=   0.0s
[CV] C=100, gamma=0.0001 .............................................
[CV] ........ C=100, gamma=0.0001, score=0.923076923077, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ............. C=0.1, gamma=1, score=0.926829268293, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ........................ C=0.1, gamma=1, score=0.9, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ............. C=0.1, gamma=1, score=0.974358974359, total=   0.0s
[CV] C=0.1, gamma=10 .................................................
[CV] ............ C=0.1, gamma=10, score=0.341463414634, total=   0.0s
[CV] C=0.1, gamma=10 .................................................
[CV] ..................... C=0.1, gamma=10, score=0.675, total=   0.0s
[CV] C=0.1, gamma=10 .................................................
[CV] .

[Parallel(n_jobs=1)]: Done  90 out of  90 | elapsed:    0.4s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [1, 10, 100, 0.1, 0.01], 'gamma': [1, 10, 0.1, 0.01, 0.001, 0.0001]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [43]:
accuracy_score(my_grid.predict(X_test), y_test)

1.0

## Regression

In [44]:
from sklearn.linear_model import LinearRegression

In [45]:
from sklearn.metrics import mean_squared_error

In [70]:
from sklearn import datasets

In [71]:
diabetes_dataset = datasets.load_diabetes()

In [72]:
X = diabetes_dataset.data
y = diabetes_dataset.target

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state=42)

In [74]:
linear_regressor = LinearRegression()

In [75]:
linear_regressor.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [76]:
mean_squared_error(linear_regressor.predict(X_test), y_test)

2900.1732878832318

In [77]:
from sklearn.neural_network import MLPRegressor

In [78]:
my_MLP_regressor = MLPRegressor((50,), solver='adam')

In [79]:
my_MLP_regressor.fit(X_train, y_train)

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(50,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [80]:
mean_squared_error(my_MLP_regressor.predict(X_test), y_test)

24771.698441154265