In [31]:
from trajectory.drivingStyle.src.myDatasets import drivingStyleDataset

import scipy.io as io
from scipy import sparse
from gensim.models import KeyedVectors
import matplotlib.pyplot as plt
import numpy as np
from numpy import *
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import time
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import time
from sklearn.datasets import load_files
import logging
from sklearn.datasets import fetch_olivetti_faces
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import warnings
import os
#读入训练和测试数据集合
TMP_DIR = 'trajectory/tmp'
MAT_OUTPUT_PATH = 'trajectory/drivingStyle/results/mat'
MATPATH = 'trajectory/drivingStyle/results/driving.mat'
DATAFILE = 'trajectory/drivingStyle/data_raw/drivingStyle60.h5'
train = drivingStyleDataset(DATAFILE, train=True, trainNum=50)
test = drivingStyleDataset(DATAFILE, train=False, trainNum=50)


In [13]:
#分离训练和测试数据
trainData, trainLabel = train.trainData, train.trainLabel
testData, testLabel = test.testData, test.testLabel

In [20]:
#定义数据转换函数，转换为matlab稀疏矩阵
def numpy2mat(array, name = 'numpy', outputPath='trajectory/drivingStyle/results'):
    names = []
    for index, element in enumerate(array):
        filename = os.path.join(outputPath, name+str(index)+'.mat')
        io.savemat(filename, {'network':sparse.csr_matrix(element)})
        names.append(filename)
    return names

strList2IntList = lambda strList : [int(elem) for elem in strList]

def deepwalk(matFile):
    '''
    deepwalk形成每个节点的embedding，返回所有的embedding数据
    '''
    embeddingFile = os.path.join(os.path.dirname(matFile), os.path.basename(matFile).split('.')[0])+'.embeddings'
    command = '/opt/anaconda3/bin/deepwalk --format mat --input {matFile} --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 --window-size 10 --workers 1 --output {embeddingFile}'.format(matFile=matFile, embeddingFile=embeddingFile)
    # tmp = os.popen('conda activate base')
    tmp = os.popen(command)
    tmp.read()
    # tmp = os.popen('conda activate torch_gpu')
    # tmp = tmp
    print(command)
    model = KeyedVectors.load_word2vec_format(embeddingFile, binary=False)
    return strList2IntList(model.index2word), model

def matrixZeroAlign(index, dictData, rowsNum = 209):
    zeroLine = np.array([0] * 64, dtype=np.float32)
    matrix = [[]] * 209
    #matrix索引从零开始
    for i in range(1, rowsNum+1):
        if i in index:
            matrix[i-1] = dictData.get_vector(str(i))
        else:
            matrix[i-1] = zeroLine
    return np.vstack(matrix)


def deepwalkOnDatasets(matFileList):
    list = []
    for matFile in matFileList:
        index, dictData = deepwalk(matFile)
        result = matrixZeroAlign(index, dictData)
        #调整result的形状
        result = result[np.newaxis, :]
        list.append(result)
    return np.vstack(list)





In [25]:
#转化数据，并保存文件名


#只保留第二维数据,转化为matlab支持的稀疏矩阵
filter = lambda datasets: np.array([data[1] for data in datasets])
trainData, testData = filter(trainData), filter(testData)
trainMatDataName = numpy2mat(trainData, name='train', outputPath = MAT_OUTPUT_PATH)
testMatDataName = numpy2mat(testData, name='test', outputPath = MAT_OUTPUT_PATH)
# currentDir = os.getcwd()
# os.chdir(TMP_DIR)
# os.chdir(currentDir)

In [26]:
#对数据进行deepwalk游走，得到向量表示
trainDataDescrip = deepwalkOnDatasets(trainMatDataName)
testDataDescrip = deepwalkOnDatasets(testMatDataName)

/opt/anaconda3/bin/deepwalk --format mat --input trajectory/drivingStyle/results/mat/train0.mat --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 --window-size 10 --workers 1 --output trajectory/drivingStyle/results/mat/train0.embeddings
/opt/anaconda3/bin/deepwalk --format mat --input trajectory/drivingStyle/results/mat/train1.mat --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 --window-size 10 --workers 1 --output trajectory/drivingStyle/results/mat/train1.embeddings
/opt/anaconda3/bin/deepwalk --format mat --input trajectory/drivingStyle/results/mat/train2.mat --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 --window-size 10 --workers 1 --output trajectory/drivingStyle/results/mat/train2.embeddings
/opt/anaconda3/bin/deepwalk --format mat --input trajectory/drivingStyle/results/mat/train3.mat --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 

In [29]:
#缓存一下deepwalk结果
import h5py
import time
DEEPWALK_RESULT_PATH = 'trajectory/drivingStyle/data_raw/'
time0 = time.time()
f = h5py.File(DEEPWALK_RESULT_PATH + 'deepwalk60.h5','w')   #创建一个h5文件，文件指针是f  
f['trainDataDescrip'], f['trainLabel'] = trainDataDescrip, trainLabel                  #将数据写入文件的主键data下面  
f['testDataDescrip'], f['testDataLabel'] = testDataDescrip, testLabel
f.close()                        #关闭文件  
print('saving time %.2fs' % (time.time() - time0))

saving time 0.02s


In [39]:
#有三类司机
targets = [0, 1, 2]
target_names = ['0', '1', '2']
n_targets = 3
#TODO 暂时没有很好的解决方法，先将每个矩阵展开成一维
X_train, y_train = trainDataDescrip.reshape(trainDataDescrip.shape[0], trainDataDescrip.shape[1] * trainDataDescrip.shape[2]), trainLabel
X_test, y_test = testDataDescrip.reshape(testDataDescrip.shape[0], testDataDescrip.shape[1] * testDataDescrip.shape[2]), testLabel
print("搜索SVM最佳参数 ...")
param_grid = {'C': [1, 5, 10, 50, 100], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01]}
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid, verbose=2, n_jobs=4)
clf = clf.fit(X_train, y_train)
print("使用GridSearchCV搜索到的最佳参数是:")
print(clf.best_params_)

start = time.time()
print("预测测试集 ...")
y_pred = clf.best_estimator_.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=range(n_targets))
print("预测完成用时 {0:.2f}秒。\n".format(time.time() - start))
#print("混淆矩阵:")
np.set_printoptions()
#print(cm)

print(classification_report(y_test, y_pred, labels=range(n_targets), target_names=target_names))

搜索SVM最佳参数 ...
Fitting 3 folds for each of 25 candidates, totalling 75 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  44 tasks      | elapsed:    1.1s
使用GridSearchCV搜索到的最佳参数是:
{'C': 5, 'gamma': 0.01}
预测测试集 ...
预测完成用时 0.01秒。

              precision    recall  f1-score   support

           0       0.75      0.60      0.67         5
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         4

    accuracy                           0.33         9
   macro avg       0.25      0.20      0.22         9
weighted avg       0.42      0.33      0.37         9

[Parallel(n_jobs=4)]: Done  68 out of  75 | elapsed:    1.3s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  75 out of  75 | elapsed:    1.4s finished
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [23]:
!/opt/anaconda3/bin/deepwalk --format mat --input trajectory/drivingStyle/results/mat/train6.mat --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 --window-size 10 --workers 1 --output trajectory/drivingStyle/results/mat/train6.embeddings

(9, 209, 64)

In [6]:
a = os.popen('/opt/anaconda3/bin/deepwalk --format mat --input trajectory/drivingStyle/results/mat/train4.mat --number-walks 80 --max-memory-data-size 0 --representation-size 64 --walk-length 40 --window-size 10 --workers 1 --output trajectory/drivingStyle/results/mat/train4.embeddings')

In [20]:
a = np.array([[1, 2], [3, 4]])

In [21]:
a

array([[1, 2],
       [3, 4]])