选择一个API的数据进行实验，确保分类之后pn_sim要比分类前pu_sim明显降低

In [1]:
import os
import re
import pandas as pd
import numpy as np
import api_tags_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import vstack
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

data_path = 'DATA/clean_API_data/'
file_list = os.listdir(data_path)
for i in range(len(file_list)):
    print(i, file_list[i])

0 clean_google-checkout.csv
1 clean_google-adwords.csv
2 clean_wurfl.csv
3 clean_deezer.csv
4 clean_itunes-and-itunes-connect.csv
5 clean_musicbrainz.csv
6 clean_facebook.csv
7 clean_zipcode.csv
8 clean_google-directions.csv
9 clean_flickr.csv
10 clean_dailymotion.csv
11 clean_mailgun.csv
12 clean_tumblr.csv
13 clean_mapbox.csv
14 clean_lastfm.csv
15 clean_yahoo-mail.csv
16 clean_amazon-s3.csv
17 clean_aws-cloud9.csv
18 clean_twitter.csv
19 clean_reddit.csv
20 clean_brightcove.csv
21 clean_pinterest.csv
22 clean_twilio.csv
23 clean_woocommerce.csv
24 clean_geocoder.csv
25 clean_cartodb.csv
26 clean_google-cloud-sql.csv
27 clean_coupon.csv
28 clean_google-adsense.csv
29 clean_soundcloud.csv
30 clean_gravatar.csv
31 clean_google-earth.csv
32 clean_google-fusion-tables.csv
33 clean_google-cloud-dns.csv
34 clean_google-plus.csv
35 clean_openlayers.csv
36 clean_plunker.csv
37 clean_google-maps-android.csv
38 clean_ilist.csv
39 clean_allegro.csv
40 clean_openstreetmap.csv
41 clean_paypal.csv

In [2]:
pt_dic = {}
f = open('DATA/API_PT.txt', 'r')
for line in f.readlines():
    print(line)
    words = re.split('\t', line.strip())
    pt_dic[words[0]] = words[1]

2checkout	2checkout

admob	admob

allegro	allegro

amazon-ec2	amazon-ec2

amazon-queue-service	amazon-sqs

amazon-s3	amazon-s3

amazon-ses	amazon-ses

amazon-sns	amazon-sns

authorizenet	authorize.net

aws-cloud9	aws-cloud9

bigcommerce	bigcommerce

bing-maps	bing-maps

breadcrumbs	breadcrumbs

brightcove	brightcove

buffer	buffer

cartodb	cartodb

coinbase	coinbase-api

coupon	coupon

dailymotion	dailymotion-api

deezer	deezer

digitalocean	digital-ocean

dropbox	dropbox-api

eventbrite	eventbrite

facebook-ads	facebook-ads-api

facebook	facebook

flickr	flickr

foursquare	foursquare

geocoder	google-geocoder

geonames	geonames

gmail	gmail-api

google-adsense	adsense-api

google-adwords	google-adwords

google-app-engine	google-app-engine

google-checkout	google-checkout

google-cloud-dns	google-cloud-dns

google-cloud-print	google-cloud-print

google-cloud-sql	google-cloud-sql

google-directions	google-direction

google-distance-matrix	google-distancematrix-api

google-drive	google-d

In [3]:
def get_clean_tags(tags, ptag):
    result = ''
    for ele in tags:
        if ele != ptag:
            result = result + ele[1:-1] + ' '
    if result == '':
        return result
    else:
        return result[:-1]

In [4]:
def text_list(df, ptag):
    pos_text = []
    unl_text = []
    p_line_dic = {}
    n_line_dic = {}
    oneptline = []
    p = 0
    n = 0
    
    for i in range(df.shape[0]):
        tags = df.iloc[i]['Tags']
        tags = re.findall('<.*?>', tags, re.S)
        if ptag in tags:
            clean_tags = get_clean_tags(tags, ptag)
            if clean_tags != '':
                pos_text.append(clean_tags)
                p_line_dic[p] = i
                p += 1
            else:
                oneptline.append(i)
        else:
            n_line_dic[n] = i
            n += 1
            unl_text.append(get_clean_tags(tags, ptag))
    return pos_text, unl_text, p_line_dic, n_line_dic, oneptline

In [5]:
def Vector(pos_text, unl_text):
    tfidf = TfidfVectorizer()
    corpus = pos_text + unl_text
    corpus_v = tfidf.fit_transform(corpus)
    
    labels = [1]*len(pos_text)
    labels += [0]*len(unl_text)
    labels = np.array(labels)
    np.reshape(labels, (labels.shape[0], 1))
    y = pd.Series(labels)
    return corpus, corpus_v, y

In [6]:
def PUalgo(corpus_v, y):
    ## Step 1: NB
    iP = y[y == 1].index
    iU = y[y == 0].index
    #corpus_v = corpus_v.toarray()
    
    n_com = min(500, corpus_v.shape[1])
    n_com = min(n_com, corpus_v.shape[0]-1)
    svd = TruncatedSVD(n_components=n_com-1, n_iter=10, random_state=0)
    svd.fit(corpus_v)
    corpus_v1 = svd.transform(corpus_v)
    print("After SVD, the vector shape: ", corpus_v1.shape)
    
    gnb = GaussianNB()
    gnb.fit(corpus_v1, y.values)
    u_pred = gnb.predict(corpus_v1[iU])
    iRN = iU[u_pred==0]
    if len(iRN) == 0:
        print(" No relible NEG. All iUs are POS.")
        return iP, iU, iRN
    
    ## Step 2: SVM
    cnt = 1
    print("  Running:", end="")
    while True:
        #print(" PU_Epoch:%d" % cnt)
        cnt += 1
        print("=", end="")
        
        iQ = list(set(iU) - set(iRN))
        #print("  iU, iRN, iQ: ", len(iU), len(iRN), len(iQ))
        if len(iQ) == 0:
            print("  \nNo iQ. All iUs are NEG.")
            return iP, iQ, iRN
        iQ = np.array(iQ)
        ##Xt = np.append(corpus_v[iP], corpus_v[iRN], axis=0)
        Xt = vstack((corpus_v[iP], corpus_v[iRN]))
        yt = y[iP].append(y[iRN])
        #print("X_shape, y_shape: ", Xt.shape, yt.shape)
        clf = SVC(gamma='scale')
        #clf = tree.DecisionTreeClassifier()
        #clf = RandomForestClassifier(n_estimators=10)
        #clf = AdaBoostClassifier()
        #clf = GaussianNB()
        clf.fit(Xt, yt)
    
        pred = clf.predict(corpus_v[iQ])
        iW = iQ[pred == 0]
        if len(iW) == 0:
            print("\n  No iW. All iQs are POS.")
            print("  iU, iRN, iQ: ", len(iU), len(iRN), len(iQ))
            scores = clf.score(corpus_v[iP], y[iP])
            print("  The model score on POS samples is %f" % scores)
            return iP, iQ, iRN
        else:
            #print("  Negs in iQ: ", len(iW))
            iRN = np.append(iRN, iW)

In [7]:
def check_sim(pos_text, unl_text, iP, iQ, iRN):
    corpus = pos_text + unl_text
    
    finalP = []
    finalN = []
    finalp1 = []
    finalp2 = []

    for i in iRN:
        finalN.append(corpus[i])

    for i in range(len(iP)):
        finalP.append(corpus[iP[i]])
        if i%2 == 0:
            finalp1.append(corpus[iP[i]])
        else:
            finalp2.append(corpus[iP[i]])
    for i in range(len(iQ)):
        finalP.append(corpus[iQ[i]])
        if i%2 == 0:
            finalp1.append(corpus[iQ[i]])
        else:
            finalp2.append(corpus[iQ[i]])
    
    sim0 = api_tags_similarity.cal_list_sim(finalP, finalN)
    sim1 = api_tags_similarity.cal_list_sim(finalp1, finalp2)
    return sim0, sim1

In [8]:
def savefile(save_path, apiname, df, p_line_dic, n_line_dic, oneptline, iP, iQ):
    pdf = pd.DataFrame()
    PID = {}
    for i in oneptline:
        PID[i] = 1
    for i in iP:
        PID[p_line_dic[i]] = 1
    nP = len(iP)
    for i in iQ:
        PID[n_line_dic[i-nP]] = 1
    plist = sorted(PID)
    for i in plist:
        ele = pd.DataFrame(df.iloc[i]).T
        pdf = pdf.append([ele])
    pdf.to_csv(save_path+apiname+'_R.csv',index=False,sep=',')
    print(" Relative CSV shape: ", pdf.shape)

In [11]:
save_path = 'DATA/relative_api_data1/'
for index in range(0, len(pt_dic)):
    apiname = file_list[index][6:-4]
    ptag = '<' + pt_dic[apiname] + '>'
    print(index, apiname, ptag)
    df = pd.read_csv(data_path+file_list[index])
    print(' ', df.shape)
    pos_text, unl_text, p_line_dic, n_line_dic, oneptline = text_list(df, ptag)
    print(" PosN: %d, UnlN: %d" % (len(pos_text), len(unl_text)))
    corpus, corpus_v, y = Vector(pos_text, unl_text)
    print(" Vector shape: ", corpus_v.shape)
    iP, iQ, iRN = PUalgo(corpus_v, y)
    
    sim_pu0 = api_tags_similarity.api_tag_sim(df, ptag)
    sim_pp0 = api_tags_similarity.api_tag_sim_pp(df, ptag)
    print(" Simpu0=%f\tSimpp0=%f" % (sim_pu0, sim_pp0))
    sim_pn1, sim_pp1 = check_sim(pos_text, unl_text, iP, iQ, iRN)
    print(" Simpn1=%f\tSimpp1=%f" % (sim_pn1, sim_pp1))
    savefile(save_path, apiname, df, p_line_dic, n_line_dic, oneptline, iP, iQ)
    print("\n")

0 admob <admob>
  (9345, 15)
 PosN: 7279, UnlN: 1906
 Vector shape:  (9185, 1248)
After SVD, the vector shape:  (9185, 499)
  Running:==
  No iW. All iQs are POS.
  iU, iRN, iQ:  1906 1631 275
  The model score on POS samples is 0.996703
 Simpu0=0.987634	Simpp0=0.999138
 Simpn1=0.983138	Simpp1=0.999193
 Relative CSV shape:  (7714, 15)


1 yahoo-mail <yahoo-mail>
  (3626, 15)
 PosN: 135, UnlN: 3487
 Vector shape:  (3622, 1407)
After SVD, the vector shape:  (3622, 499)
  Running:=====
  No iW. All iQs are POS.
  iU, iRN, iQ:  3487 3479 8
  The model score on POS samples is 0.037037
 Simpu0=0.889959	Simpp0=0.906461
 Simpn1=0.881093	Simpp1=0.901620
 Relative CSV shape:  (147, 15)


2 payments-gateway <payment-gateway>
  (6793, 15)
 PosN: 3073, UnlN: 3662
 Vector shape:  (6735, 1531)
After SVD, the vector shape:  (6735, 499)
  No iW. All iQs are POS.
  iU, iRN, iQ:  3662 1431 2231
  The model score on POS samples is 0.956069
 Simpu0=0.917585	Simpp0=0.991948
 Simpn1=0.862606	Simpp1=0.996121


 Simpu0=0.841392	Simpp0=0.993910
 Simpn1=0.829662	Simpp1=0.995313
 Relative CSV shape:  (1658, 15)


24 vk <vk>
  (5561, 15)
 PosN: 242, UnlN: 5307
 Vector shape:  (5549, 3066)
After SVD, the vector shape:  (5549, 499)
  No iW. All iQs are POS.
  iU, iRN, iQ:  5307 5186 121
  The model score on POS samples is 0.570248
 Simpu0=0.552810	Simpp0=0.865992
 Simpn1=0.585607	Simpp1=0.900581
 Relative CSV shape:  (375, 15)


25 linkedin <linkedin-api>
  (6833, 15)
 PosN: 927, UnlN: 5854
 Vector shape:  (6781, 1819)
After SVD, the vector shape:  (6781, 499)
  No iW. All iQs are POS.
  iU, iRN, iQ:  5854 4593 1261
  The model score on POS samples is 0.669903
 Simpu0=0.956104	Simpp0=0.996048
 Simpn1=0.918106	Simpp1=0.998652
 Relative CSV shape:  (2240, 15)


26 openlayers <openlayers>
  (5768, 15)
 PosN: 2937, UnlN: 2474
 Vector shape:  (5411, 1181)
After SVD, the vector shape:  (5411, 499)
  No iW. All iQs are POS.
  iU, iRN, iQ:  2474 2246 228
  The model score on POS samples is 0.864828
 Simpu0

  No iW. All iQs are POS.
  iU, iRN, iQ:  7730 5187 2543
  The model score on POS samples is 0.857402
 Simpu0=0.938037	Simpp0=0.996667
 Simpn1=0.856204	Simpp1=0.998626
 Relative CSV shape:  (6689, 15)


71 plivo <plivo>
  (195, 15)
 PosN: 142, UnlN: 41
 Vector shape:  (183, 169)
After SVD, the vector shape:  (183, 168)
  Running:=  
No iQ. All iUs are NEG.
 Simpu0=0.621795	Simpp0=0.758350
 Simpn1=0.621795	Simpp1=0.814027
 Relative CSV shape:  (154, 15)


72 sendgrid <sendgrid>
  (2633, 15)
 PosN: 1811, UnlN: 757
 Vector shape:  (2568, 893)
After SVD, the vector shape:  (2568, 499)
  Running:=
  No iW. All iQs are POS.
  iU, iRN, iQ:  757 295 462
  The model score on POS samples is 0.997791
 Simpu0=0.958775	Simpp0=0.984179
 Simpn1=0.946380	Simpp1=0.991293
 Relative CSV shape:  (2338, 15)


73 facebook-ads <facebook-ads-api>
  (6052, 15)
 PosN: 680, UnlN: 5324
 Vector shape:  (6004, 1872)
After SVD, the vector shape:  (6004, 499)
  No iW. All iQs are POS.
  iU, iRN, iQ:  5324 4544 780
  

  No iW. All iQs are POS.
  iU, iRN, iQ:  9414 8268 1146
  The model score on POS samples is 0.705676
 Simpu0=0.928930	Simpp0=0.992825
 Simpn1=0.879423	Simpp1=0.996812
 Relative CSV shape:  (3261, 15)


95 recurly <recurly>
  (192, 15)
 PosN: 108, UnlN: 43
 Vector shape:  (151, 139)
After SVD, the vector shape:  (151, 138)
  Running:=
  No iW. All iQs are POS.
  iU, iRN, iQ:  43 42 1
  The model score on POS samples is 1.000000
 Simpu0=0.537188	Simpp0=0.857612
 Simpn1=0.539528	Simpp1=0.815237
 Relative CSV shape:  (150, 15)


96 rhapsody <rhapsody>
  (191, 15)
 PosN: 124, UnlN: 50
 Vector shape:  (174, 256)
After SVD, the vector shape:  (174, 172)
  Running:=
  No iW. All iQs are POS.
  iU, iRN, iQ:  50 47 3
  The model score on POS samples is 1.000000
 Simpu0=0.622615	Simpp0=0.856802
 Simpn1=0.570385	Simpp1=0.853621
 Relative CSV shape:  (144, 15)


97 mandrill <mandrill>
  (1283, 15)
 PosN: 847, UnlN: 362
 Vector shape:  (1209, 598)
After SVD, the vector shape:  (1209, 499)
  Running

结果汇总4
pu          pn          pp0         pp1
0.939503    0.928205    0.984753    0.994634    0.742268
0.793243    0.717135    0.977052    0.988233    0.734848
0.786801    0.650034    0.993227    0.993816    0.867235
0.932662    0.923446    0.997238    0.997801    0.991949
0.928930    0.879423    0.992825    0.996812    0.705676
 0.537188    0.539528    0.857612    0.815237    1.000000
0.622615    0.570385    0.856802    0.853621    1.000000
0.942938    0.893935    0.970987    0.984163    0.996458
0.982186    0.969963    0.997806    0.999448    0.716346
 0.956952    0.959363    0.987627    0.997747    0.728546
0.699461    0.689466    0.977581    0.979220    0.988656
0.676217    0.601785    0.746985    0.925679    1.000000
0.927664    0.855805    0.974393    0.980847    0.995091
0.374690    0.203190    0.994846    0.995691    0.981333
0.983861    0.912312    0.992165    0.996953    0.925750

结果汇总3
pu          pn          pp0         pp1
0.864397    0.857465    0.965522    0.970464    0.905080
0.851505    0.768179    0.931468    0.929892    0.992647
 0.861787    0.729590    0.980960    0.994923    0.268293
0.640357    0.481811    0.924345    0.939272    0.900000
0.898477    0.859589    0.963754    0.983499    0.880866
0.863287    0.843825    0.982928    0.994218    0.608775
0.977028    0.957821    0.999262    0.999144    0.995916
 0.782753    0.879903    0.992361    0.997189    0.606723
0.823931    0.815546    0.988612    0.990051    0.988499
0.931567    0.849302    0.929441    0.961153    0.847095
0.938037    0.856204    0.996667    0.998626    0.857402
0.621795    0.621795    0.758350    0.814027    -
0.958775    0.946380    0.984179    0.991293    0.997791
0.891384    0.835517    0.996657    0.997571    0.692647
0.974044    0.968894    0.999686    0.999731    0.984288
 0.953928    0.932339    0.976317    0.995202    0.329861
0.923198    0.822477    0.973366    0.969104    0.997868
0.835854    0.835854    0.821042    0.874011    -
0.776693    0.560842    0.763167    0.811018    1.000000
0.717221    0.645132    0.816489    0.866562    1.000000
0.927476    0.860564    0.986275    0.990892    0.999651
0.684266    0.658368    0.944413    0.971286    0.711392
0.924758    0.861805    0.998932    0.999043    0.993406
0.271597    0.211621    0.997869    0.998875    0.873075
0.865552    0.804609    0.915768    0.948226    1.000000
0.902904    0.808324    0.897128    0.910595    0.931818
0.959238    0.907741    0.999547    0.999490    0.999147
0.901948    0.841859    0.989314    0.994428    0.934926
0.844364    0.522589    0.982527    0.995576    0.861915
0.757342    0.757342    0.805707    0.860989    -

结果汇总2
pu          pn          pp0         pp1
0.973404    0.973105    0.992422    0.991873    1.000000
0.732633    0.508259    0.925803    0.961421    0.856287
0.726337    0.557222    0.930357    0.969210    0.976923
0.864122    0.678659    0.980782    0.990784    0.982808
0.792655    0.558601    0.980143    0.996837    0.731148
0.641754    0.639544    0.917332    0.895562    0.989011
0.863718    0.858103    0.925784    0.930748    1.000000
0.907659    0.855555    0.942047    0.955048    0.985560
0.732043    0.590628    0.999344    0.999599    0.994358
0.911484    0.698595    0.898800    0.942579    0.957143
0.963028    0.921643    0.986620    0.993900    0.994359
 0.876811    0.756401    0.989399    0.995111    0.119602
0.938604    0.880968    0.939017    0.964365    0.825243
0.913004    0.858403    0.999516    0.999349    0.999926
0.890641    0.775060    0.836229    0.939722    0.871429
0.040492    0.035458    0.827979    0.908831    0.828025
0.801700    0.801700    0.972373    0.974967    -
0.679635    0.479961    0.993031    0.997256    0.901581
0.933528    0.903748    0.948313    0.995941    0.694444
 0.869026    0.705440    0.953300    0.974759    0.373239
0.688867    0.578696    0.848636    0.852620    0.902098
0.563940    0.419120    0.993385    0.995762    0.965056
0.905948    0.747963    0.999700    0.999743    0.926997
0.785317    0.715684    0.971216    0.983948    0.992000
0.886494    0.563935    0.798913    0.866216    0.986395
0.978986    0.942039    0.990690    0.995490    0.964119
0.943960    0.827915    0.963609    0.978835    0.978992
 0.844948    0.891907    0.993738    0.995052    0.995816
0.961936    0.884808    0.974645    0.989127    0.925252
 0.963043    0.991948    0.999339    0.999899    0.685321

结果汇总1
pu          pn          pp0         pp1
0.987634    0.983138    0.999138    0.999193    0.996703
 0.889959    0.881093    0.906461    0.901620    0.037037
0.917585    0.862606    0.991948    0.996121    0.956069
0.888981    0.820417    0.867917    0.865363    0.952381
 0.789130    0.713352    0.896626    0.913337    0.376437
 0.888934    0.909027    0.998182    0.999252    0.743900
0.884686    0.761090    0.989057    0.989629    0.994676
0.799822    0.791363    0.910774    0.922810    1.000000
0.547900    0.485566    0.921762    0.954095    0.990847
0.852431    0.753512    0.928634    0.940360    0.983133
0.877079    0.855759    0.942892    0.990128    0.629630
0.958020    0.840487    0.995331    0.997008    0.963455
0.954011    0.916756    0.962899    0.985225    0.927757
0.851684    0.846418    0.840327    0.834058    0.951389
0.973719    0.942340    0.996962    0.996313    0.999334
0.911499    0.785215    0.996246    0.995369    0.952174
0.963331    0.953887    0.971573    0.979618    0.996324
0.964815    0.931775    0.990984    0.990197    0.990212
0.976919    0.968084    0.999842    0.999878    0.998649
0.225938    0.185216    0.980829    0.985928    0.769968
0.974369    0.972126    0.999196    0.998942    0.973136
0.947166    0.914595    0.988270    0.995529    0.988310
0.921163    0.921163    0.967828    0.957787    -
0.841392    0.829662    0.993910    0.995313    0.822004
 0.552810    0.585607    0.865992    0.900581    0.570248
0.956104    0.918106    0.996048    0.998652    0.669903
0.680918    0.667458    0.995291    0.995987    0.864828
0.632683    0.606078    0.962460    0.895459    0.994924
 0.942262    0.950759    0.951775    0.998178    0.382353
0.571905    0.314192    0.997724    0.999329    0.790687

结果修正：
15 yahoo-mail <yahoo-mail>
  (3626, 15)
 PosN: 135, UnlN: 3487
 Vector shape:  (3622, 1407)
After SVD, the vector shape:  (3622, 499)
  Running:============
  No iW. All iQs are POS.
  iU, iRN, iQ:  3487 3387 100
  The model score on POS samples is 0.659259
 Simpu0=0.889959	Simpp0=0.906461
 Simpn1=0.856731	Simpp1=0.950006
 Relative CSV shape:  (239, 15)
    
    
7 zipcode <zipcode>
  (3706, 15)
 PosN: 348, UnlN: 3353
 Vector shape:  (3701, 1747)
After SVD, the vector shape:  (3701, 999)
  Running:=====
  No iW. All iQs are POS.
  iU, iRN, iQ:  3353 2376 977
  The model score on POS samples is 1.000000
 Simpu0=0.789130	Simpp0=0.896626
 Simpn1=0.748105	Simpp1=0.971357
 Relative CSV shape:  (1330, 15)
    
    
    
52 youtube <youtube-api>
  (45527, 15)
 PosN: 8442, UnlN: 36315
 Vector shape:  (44757, 6211)
After SVD, the vector shape:  (44757, 499)
  Running:=========
  No iW. All iQs are POS.
  iU, iRN, iQ:  36315 29637 6678
  The model score on POS samples is 0.668088
 Simpu0=0.888934	Simpp0=0.998182
 Simpn1=0.884491	Simpp1=0.999504
 Relative CSV shape:  (15890, 15)
    
36 plunker <plunker>
  (8374, 15)
 PosN: 238, UnlN: 8119
 Vector shape:  (8357, 1317)
After SVD, the vector shape:  (8357, 999)
  Running:===========
  No iW. All iQs are POS.
  iU, iRN, iQ:  8119 6354 1765
  The model score on POS samples is 0.815126
 Simpu0=0.942262	Simpp0=0.951775
 Simpn1=0.963020	Simpp1=0.999006
 Relative CSV shape:  (2020, 15)
    
    
81 buffer <buffer>
  (114496, 15)
 PosN: 5326, UnlN: 109164
 Vector shape:  (114490, 10995)
After SVD, the vector shape:  (114490, 499)
  Running:============
  No iW. All iQs are POS.
  iU, iRN, iQ:  109164 97360 11804
  The model score on POS samples is 0.858806
 Simpu0=0.876811	Simpp0=0.989399
 Simpn1=0.810531	Simpp1=0.997617
 Relative CSV shape:  (17136, 15)
    
    
    
75 mixer <mixer>
  (1710, 15)
 PosN: 142, UnlN: 1568
 Vector shape:  (1710, 1092)
After SVD, the vector shape:  (1710, 499)
  Running:========
  No iW. All iQs are POS.
  iU, iRN, iQ:  1568 1277 291
  The model score on POS samples is 1.000000
 Simpu0=0.869026	Simpp0=0.953300
 Simpn1=0.797100	Simpp1=0.982495
 Relative CSV shape:  (433, 15)
    
8 google-directions <google-direction>
  (13692, 15)
 PosN: 287, UnlN: 13401
 Vector shape:  (13688, 4465)
After SVD, the vector shape:  (13688, 499)
  Running:=======
  No iW. All iQs are POS.
  iU, iRN, iQ:  13401 11584 1817
  The model score on POS samples is 0.993031
 Simpu0=0.861787	Simpp0=0.980960
 Simpn1=0.734504	Simpp1=0.998929
 Relative CSV shape:  (2108, 15)
      
38 ilist <ilist>
  (4992, 15)
 PosN: 288, UnlN: 4704
 Vector shape:  (4992, 2162)
After SVD, the vector shape:  (4992, 499)
  Running:======
  No iW. All iQs are POS.
  iU, iRN, iQ:  4704 3594 1110
  The model score on POS samples is 0.996528
 Simpu0=0.953928	Simpp0=0.976317
 Simpn1=0.946811	Simpp1=0.996300
 Relative CSV shape:  (1398, 15)
    
91 google-geocoding <google-geocoding-api>
  (3428, 15)
 PosN: 571, UnlN: 2795
 Vector shape:  (3366, 891)
After SVD, the vector shape:  (3366, 499)
  Running:======
  No iW. All iQs are POS.
  iU, iRN, iQ:  2795 1489 1306
  The model score on POS samples is 1.000000
 Simpu0=0.956952	Simpp0=0.987627
 Simpn1=0.950414	Simpp1=0.998655
 Relative CSV shape:  (1939, 15)

89 vk <vk>
  (5561, 15)
 PosN: 242, UnlN: 5307
 Vector shape:  (5549, 3066)
After SVD, the vector shape:  (5549, 499)
  Running:============
  No iW. All iQs are POS.
  iU, iRN, iQ:  5307 5046 261
  The model score on POS samples is 0.896694
 Simpu0=0.552810	Simpp0=0.865992
 Simpn1=0.618377	Simpp1=0.890789
 Relative CSV shape:  (515, 15)