# Confirm TensorFlow can see the GPU

Simply select "GPU" in the Accelerator drop-down in Notebook Settings (either through the Edit menu or the command palette at cmd/ctrl-shift-P).

In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


# Observe TensorFlow speedup on GPU relative to CPU

This example constructs a typical convolutional neural network layer over a
random image and manually places the resulting ops on either the CPU or the GPU
to compare execution speed.

In [2]:
import tensorflow as tf
import timeit

# See https://www.tensorflow.org/tutorials/using_gpu#allowing_gpu_memory_growth
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.device('/cpu:0'):
  random_image_cpu = tf.random_normal((100, 100, 100, 3))
  net_cpu = tf.layers.conv2d(random_image_cpu, 32, 7)
  net_cpu = tf.reduce_sum(net_cpu)

with tf.device('/gpu:0'):
  random_image_gpu = tf.random_normal((100, 100, 100, 3))
  net_gpu = tf.layers.conv2d(random_image_gpu, 32, 7)
  net_gpu = tf.reduce_sum(net_gpu)

sess = tf.Session(config=config)

# Test execution once to detect errors early.
try:
  sess.run(tf.global_variables_initializer())
except tf.errors.InvalidArgumentError:
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise

def cpu():
  sess.run(net_cpu)
  
def gpu():
  sess.run(net_gpu)
  
# Runs the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

sess.close()

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
10.8401720524
GPU (s):
2.19899487495
GPU speedup over CPU: 4x


In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.read_csv('drive/day1_data_all.csv')
X = data.drop(['status'],axis =1)
Y = data.loc[:,'status']

In [0]:
import time

In [0]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.3,random_state = 2018)

In [9]:
!pip install joblib
!pip install pandas
!pip install gbdt
!pip install xgboost
!pip install lightgbm
!pip install imbalanced-learn
!pip install -U git+https://github.com/scikit-learn-contrib/imbalanced-learn.git

Collecting gbdt
  Using cached https://files.pythonhosted.org/packages/33/48/ad5e3c0356d85dd3c85006fe512e822e595a333f355dcb60be720cada9e1/gbdt-0.3.1.2.tar.gz
Building wheels for collected packages: gbdt
  Running setup.py bdist_wheel for gbdt ... [?25l- \ | done
[?25h  Stored in directory: /root/.cache/pip/wheels/e1/4e/77/9269956665f9f92ebb16eec5029c5dc5ac98be513a3a649c1b
Successfully built gbdt
Installing collected packages: gbdt
Successfully installed gbdt-0.3.1.2
Collecting git+https://github.com/scikit-learn-contrib/imbalanced-learn.git
  Cloning https://github.com/scikit-learn-contrib/imbalanced-learn.git to /tmp/pip-req-build-qxV055
Building wheels for collected packages: imbalanced-learn
  Running setup.py bdist_wheel for imbalanced-learn ... [?25l- \ done
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-e7cboU/wheels/6c/07/cf/38cb9b7cc9e6a0ac7648a80ec192b6f2d863405fb0049ac0ff
Successfully built imbalanced-learn
Installing collected packages: imbalanced-learn

In [0]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score,GridSearchCV
# from sklearn.model_selection import    # no sklearn.grid_search

import time

In [0]:
# k(5)折 + 标准化
def k5_standard_unadjusted(a):
  
    estimators = [('scl', StandardScaler()), ('clf', a)]
    pipe_model = Pipeline(estimators)
    pipe_model.fit(X_train, y_train)  # print(pipe_lr.score(X_test, y_test))
    scores = cross_val_score(estimator=pipe_model, X=X_test, y=y_test, cv=5, n_jobs=1)
    print(scores.mean())
    
    
def k5_standard(a,param_grid):
  
    estimators = [('scl', StandardScaler()), ('clf', a)]
    pipe_model = Pipeline(estimators)
    pipe_model.fit(X_train, y_train)  # print(pipe_lr.score(X_test, y_test))
    
    clf_gs = GridSearchCV(estimator=pipe_model,param_grid=param_grid,scoring='accuracy',cv=5,n_jobs=-1)
    
    scores = cross_val_score(estimator=pipe_model, X=X_test, y=y_test, cv=5, n_jobs=1)
    print(scores.mean())    

In [23]:
# 逻辑回归
t0 = time.time()
from sklearn.linear_model import LogisticRegression

param_range=[0.01,0.1,1]   # 0.0001,0.001,0.01,0.1,1,10,100,1000
param_penalty=['l1','l2']
parameters = [{'clf__C':param_range,'clf__penalty':param_penalty}]

clf= Pipeline([('scl',StandardScaler()),('clf',LogisticRegression(random_state=2018))])

gs_lr = GridSearchCV(estimator=clf,param_grid=parameters,scoring='accuracy',cv=5,n_jobs=-1)
gs_lr = gs_lr.fit(X_train,y_train)
# clf_accuracy_scores = cross_val_score(estimator=clf_gs,X=X,y=y,scoring='accuracy',cv=5)

print(gs_lr.best_score_)
print(gs_lr.best_params_)

print (time.time() - t0)      


0.7932070934776074
{'clf__penalty': 'l1', 'clf__C': 0.1}
7.67769217491


  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)


In [22]:
# SVM
t0 = time.time()

from sklearn.svm import SVC  

pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=2018))])
param_range = [ 0.1, 1.0, 10.0, 100.0]   # , 1.0, 10.0, 100.0, 1000.0  # 0.0001, 0.001, 0.01, 
parameters = [
            {'clf__C': param_range,'clf__kernel': ['linear']},
            {'clf__C': param_range, 'clf__gamma': param_range, 'clf__kernel': ['rbf']}
]

gs_sm = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, scoring='accuracy', cv=10, n_jobs=-1)
# 待： 两个cv  进度查看  这里没用val
gs_sm = gs_sm.fit(X_train, y_train)

print(gs_sm.best_score_)
print(gs_sm.best_params_)

# clf = gs.best_estimator_
# print('Test accuracy: %.3f' % clf.score(X_test, y_test))

print (time.time() - t0)

0.7974150886684701
{'clf__C': 0.1, 'clf__kernel': 'linear'}
Test accuracy: 0.778
1874.42206717


param_range 的前半部分已经试过，0.1最优，所以这里0.1最优。
疑问：前两个param_range的区别

In [0]:
# 决策树
t0 = time.time()

from sklearn import tree

parameters = [
    {'n_estimators':range(10,70,10)}
]

clf = tree.DecisionTreeClassifier(random_state = 2018)

gs_tr = GridSearchCV(clf,param_grid = parameters, scoring='accuracy',cv=5, n_jobs=-1)
gs_tr = gs_tr.fit(X_train, y_train)

print(gs_tr.best_score_)
print(gs_tr.best_params_)

print (time.time() - t0)

In [0]:
#随机森林
t0 = time.time()

from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=2018)

#在bootstap为True和False的情况下的参数组合
parameters=[
           {'n_estimators':[3,10,15],'max_depth':[1,2,3,4,5,6,7,None]},
           {'bootstrap':[False],'n_estimators':[3,10],'max_depth':[1,2,3,4,5,6,7,None]}
]

gs_rf = GridSearchCV(estimator=rfc,param_grid=parameters,cv=10,scoring='accuracy',n_jobs=-1)
gs_rf = cross_val_score(estimator=rfc_rf,X=X,y=y,scoring='accuracy',cv=5)
gs_rf = gs_rf.fit(X_train, y_train)

print(gs_rf.best_score_)
print(gs_rf.best_params_)

# print('CV clf f1 score:%.3f +/- %.3f'%(np.mean(rfc_f1_scores),np.std(rfc_f1_scores)))

print (time.time() - t0)


In [13]:
# GBDT
t0 = time.time()

from sklearn.ensemble import GradientBoostingClassifier 

parameters = [
             {
              'max_depth': range(5,50,5),
              'max_features': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
             }
]
xlf = GradientBoostingClassifier(random_state = 2018)

gs_gbdt = GridSearchCV(xlf, param_grid=parameters, scoring='accuracy', cv=5)
gs_gbdt = gs_gbdt.fit(X_train, y_train)

print(gs_gbdt.best_score_)
print(gs_gbdt.best_params_)

print (time.time() - t0)

KeyboardInterrupt: ignored

In [11]:
# XGBoost
t0 = time.time()

import xgboost as xgb

parameters = [
    
          { 
             'max_depth':range(3,10,2),
             'min_child_weight':range(1,6,2)
          }
]
xlf = xgb.XGBClassifier(n_estimators=200,random_state=2018)
            
gs_xgb = GridSearchCV(xlf, param_grid=parameters, scoring='accuracy', cv=5)
gs_xgb = gs_xgb.fit(X_train, y_train)

print(gs_xgb.best_score_)
print(gs_xgb.best_params_)

print (time.time() - t0)

0.7892996693718064
{'max_depth': 3, 'min_child_weight': 5}
175.299245834


In [31]:
# LightGBM
t0 = time.time()

import lightgbm as lgb

# train = lgb.Dataset(train_x, train_y)
# valid = lgb.Dataset(valid_x, valid_y, reference=train)


parameters = {
              'max_depth': range(3,8,1),   #[15, 20, 25, 30, 35],
              'learning_rate': [0.01, 0.02, 0.05, 0.1, 0.15],
              'num_leaves': range(15,50,5)
}
#               'feature_fraction': [0.6, 0.7, 0.8, 0.9, 0.95],
#               'bagging_fraction': [0.6, 0.7, 0.8, 0.9, 0.95],
#               'bagging_freq': [2, 4, 5, 6, 8],
#               'lambda_l1': [0, 0.1, 0.4, 0.5, 0.6],
#               'lambda_l2': [0, 10, 15, 35, 40],
#               'cat_smooth': [1, 10, 15, 20, 35]}

gbm = lgb.LGBMClassifier(boosting_type='gbdt',
                         objective = 'binary',
                         metric = 'auc',
                         verbose = 0,
                         learning_rate = 0.01,
                         num_leaves = 35,
                         feature_fraction=0.8,
                         bagging_fraction= 0.9,
                         bagging_freq= 8,
                         lambda_l1= 0.6,
                         lambda_l2= 0)

gs_lgb = GridSearchCV(gbm, param_grid=parameters, scoring='accuracy', cv=5)
gs_lgb.fit(X_train, y_train)

print(gs.best_score_)
print(gs.best_params_)
    
print (time.time() - t0)    


Best score: 0.796
Best parameters set:
230.358510017


参考：

https://www.jianshu.com/p/2c823a42afd9（逻辑回归）
https://blog.csdn.net/wfei101/article/details/78691644（svm）
https://www.jianshu.com/p/1100e333fcab（xgb、lgb）
https://blog.csdn.net/weixin_43314414/article/details/85012474（007号同学）

遇到问题：