## 讀取depositar現有的Qid

In [2]:
import requests
import pandas as pd
    
def make_api_call():
    url = "https://data.depositar.io/api/action/package_search"
    
    # Define the query parameters
    params = {
        "facet.field": '["keywords_facet"]',
        "facet.limit": 1000,
        "rows": 0
    }
    
    try:
        # Send the GET request
        response = requests.get(url, params=params)
        
        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            data = response.json()
            return data
        else:
            print(f"API request failed with status code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

#if __name__ == "__main__":
api_data = make_api_call()
#    if api_data:
#        print(api_data)
data=api_data["result"]['facets']['keywords_facet']
#    print(type(data))
#    print(data)


    # 使用pandas的DataFrame函數將字典轉換為DataFrame，並讓pandas自動生成索引
df_depositar = pd.DataFrame(list(data.items()), columns=['ID', 'Fre'])

    # 顯示DataFrame
print(df_depositar)

            ID  Fre
0     Q9578202   51
1    Q11070045   49
2     Q7481418   48
3      Q484000   46
4         Q865   46
..         ...  ...
464  Q96977107    1
465  Q96977148    1
466  Q97173495    1
467        Q98    1
468  Q98692446    1

[469 rows x 2 columns]


In [3]:
pip install matplotlib wordcloud

Note: you may need to restart the kernel to use updated packages.


## Get DeposiatrQid's Qid from wikidata and compute simialrity score

In [4]:
#get  DeposiatrQid's Qid from wikidata

def get_claims_for_item(item_id):
    base_url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": item_id,
        "format": "json",
        "props": "claims"
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    if item_id in data["entities"]:
        claims = data["entities"][item_id]["claims"]
        return claims
    else:
        return None


d = {}  # 使用字典來存儲數據
wait=len(df_depositar)
for i in range(len(df_depositar["ID"])):  
        wait=wait-1
        print(wait, end="\t")
        item_id = df_depositar["ID"][i] 
        item_claims = get_claims_for_item(item_id)
        if item_claims:
            q_numbers_list = []  # 建立空的 list 來存儲 "Q數字"
            for claim_property, claim_list in item_claims.items():
                for claim in claim_list:
                    claim_value = claim.get("mainsnak", {}).get("datavalue", {}).get("value", {})
                    if isinstance(claim_value, dict):
                        claim_value = claim_value.get("id", "")
                        if claim_value.startswith("Q"):
                            q_numbers_list.append(claim_value)
                            item_id = df_depositar["ID"][i] 
                            d[item_id] = q_numbers_list
print(d)

468	467	466	465	464	463	462	461	460	459	458	457	456	455	454	453	452	451	450	449	448	447	446	445	444	443	442	441	440	439	438	437	436	435	434	433	432	431	430	429	428	427	426	425	424	423	422	421	420	419	418	417	416	415	414	413	412	411	410	409	408	407	406	405	404	403	402	401	400	399	398	397	396	395	394	393	392	391	390	389	388	387	386	385	384	383	382	381	380	379	378	377	376	375	374	373	372	371	370	369	368	367	366	365	364	363	362	361	360	359	358	357	356	355	354	353	352	351	350	349	348	347	346	345	344	343	342	341	340	339	338	337	336	335	334	333	332	331	330	329	328	327	326	325	324	323	322	321	320	319	318	317	316	315	314	313	312	311	310	309	308	307	306	305	304	303	302	301	300	299	298	297	296	295	294	293	292	291	290	289	288	287	286	285	284	283	282	281	280	279	278	277	276	275	274	273	272	271	270	269	268	267	266	265	264	263	262	261	260	259	258	257	256	255	254	253	252	251	250	249	248	247	246	245	244	243	242	241	240	239	238	237	236	235	234	233	232	231	230	229	228	227	226	225	224	223	222	221	220	219	

In [5]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


### Compute Qids' Q_score of  Cosine similarity

In [6]:
import numpy as np

def calculate_cosine_similarity(qid1_props, qid2_props, all_properties):
    # Create binary vectors for each QID based on the presence of properties
    qid1_vector = [1 if prop in qid1_props else 0 for prop in all_properties]
    qid2_vector = [1 if prop in qid2_props else 0 for prop in all_properties]

    # Calculate the cosine similarity between the two vectors
    dot_product = np.dot(qid1_vector, qid2_vector)
    norm_qid1 = np.linalg.norm(qid1_vector)
    norm_qid2 = np.linalg.norm(qid2_vector)
    
    if norm_qid1 == 0 or norm_qid2 == 0:
        similarity = 0.0
    else:
        dot_product = np.dot(qid1_vector, qid2_vector)
        similarity = dot_product / (norm_qid1 * norm_qid2)

    return similarity

# Extract all properties from the data
all_properties = set(prop for props in d.values() for prop in props)

# Calculate cosine similarity between each pair of QIDs and store in a DataFrame
results = []
wait=len(d.items())

for pid1, props1 in d.items():
    wait=wait-1
    print(wait, end="\t")
    
    for pid2, props2 in d.items():
        if pid1 != pid2 :
            similarity = calculate_cosine_similarity(props1, props2, all_properties)
            results.append([pid1, pid2, similarity])

# Convert the results to a DataFrame
df_qid = pd.DataFrame(results, columns=["QID1", "QID2", "Qid_score"])

# Display the DataFrame
print(df_qid)


455	454	453	452	451	450	449	448	447	446	445	444	443	442	441	440	439	438	437	436	435	434	433	432	431	430	429	428	427	426	425	424	423	422	421	420	419	418	417	416	415	414	413	412	411	410	409	408	407	406	405	404	403	402	401	400	399	398	397	396	395	394	393	392	391	390	389	388	387	386	385	384	383	382	381	380	379	378	377	376	375	374	373	372	371	370	369	368	367	366	365	364	363	362	361	360	359	358	357	356	355	354	353	352	351	350	349	348	347	346	345	344	343	342	341	340	339	338	337	336	335	334	333	332	331	330	329	328	327	326	325	324	323	322	321	320	319	318	317	316	315	314	313	312	311	310	309	308	307	306	305	304	303	302	301	300	299	298	297	296	295	294	293	292	291	290	289	288	287	286	285	284	283	282	281	280	279	278	277	276	275	274	273	272	271	270	269	268	267	266	265	264	263	262	261	260	259	258	257	256	255	254	253	252	251	250	249	248	247	246	245	244	243	242	241	240	239	238	237	236	235	234	233	232	231	230	229	228	227	226	225	224	223	222	221	220	219	218	217	216	215	214	213	212	211	210	209	208	207	206	

## Get DeposiatrQid's Pid from wikidata and compute simialrity score

In [7]:
# get pid
def get_claims_for_item(item_id):
    base_url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": item_id,
        "format": "json",
        "props": "claims"
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    if item_id in data["entities"]:
        claims = data["entities"][item_id]["claims"]
        return claims
    else:
        return None

d = {}  # 使用字典來存儲數據
wait=len(df_depositar["ID"])
for i in range(len(df_depositar["ID"])):  
    wait=wait-1
    print(wait, end="\t")
    item_id = df_depositar["ID"][i] 
    item_claims = get_claims_for_item(item_id)
    d[item_id] = list(item_claims.keys())
print(d)


468	467	466	465	464	463	462	461	460	459	458	457	456	455	454	453	452	451	450	449	448	447	446	445	444	443	442	441	440	439	438	437	436	435	434	433	432	431	430	429	428	427	426	425	424	423	422	421	420	419	418	417	416	415	414	413	412	411	410	409	408	407	406	405	404	403	402	401	400	399	398	397	396	395	394	393	392	391	390	389	388	387	386	385	384	383	382	381	380	379	378	377	376	375	374	373	372	371	370	369	368	367	366	365	364	363	362	361	360	359	358	357	356	355	354	353	352	351	350	349	348	347	346	345	344	343	342	341	340	339	338	337	336	335	334	333	332	331	330	329	328	327	326	325	324	323	322	321	320	319	318	317	316	315	314	313	312	311	310	309	308	307	306	305	304	303	302	301	300	299	298	297	296	295	294	293	292	291	290	289	288	287	286	285	284	283	282	281	280	279	278	277	276	275	274	273	272	271	270	269	268	267	266	265	264	263	262	261	260	259	258	257	256	255	254	253	252	251	250	249	248	247	246	245	244	243	242	241	240	239	238	237	236	235	234	233	232	231	230	229	228	227	226	225	224	223	222	221	220	219	

### Compute Qids' P_score of simialrity  by TFIDF and Cosine similarity

In [8]:
# Extract all properties from the data
all_properties = set(prop for props in d.values() for prop in props)

# Calculate cosine similarity between each pair of QIDs and store in a DataFrame
results = []
wait=len(d.items())

for qid1, props1 in d.items():
    wait=wait-1
    print(wait, end="\t")
    
    for qid2, props2 in d.items():
        if qid1 != qid2 and len(props1)>1 and len(props2)>1:  
            # 比對先決條件有三：1.相同不比對。2.3.properties長度小於1視為不完整的data，也不比對。
            similarity = calculate_cosine_similarity(props1, props2, all_properties)
            results.append([qid1, qid2, similarity])

# Convert the results to a DataFrame
df_pid = pd.DataFrame(results, columns=["QID1", "QID2", "Pid_score"])

# Display the DataFrame
print(df_pid)


468	467	466	465	464	463	462	461	460	459	458	457	456	455	454	453	452	451	450	449	448	447	446	445	444	443	442	441	440	439	438	437	436	435	434	433	432	431	430	429	428	427	426	425	424	423	422	421	420	419	418	417	416	415	414	413	412	411	410	409	408	407	406	405	404	403	402	401	400	399	398	397	396	395	394	393	392	391	390	389	388	387	386	385	384	383	382	381	380	379	378	377	376	375	374	373	372	371	370	369	368	367	366	365	364	363	362	361	360	359	358	357	356	355	354	353	352	351	350	349	348	347	346	345	344	343	342	341	340	339	338	337	336	335	334	333	332	331	330	329	328	327	326	325	324	323	322	321	320	319	318	317	316	315	314	313	312	311	310	309	308	307	306	305	304	303	302	301	300	299	298	297	296	295	294	293	292	291	290	289	288	287	286	285	284	283	282	281	280	279	278	277	276	275	274	273	272	271	270	269	268	267	266	265	264	263	262	261	260	259	258	257	256	255	254	253	252	251	250	249	248	247	246	245	244	243	242	241	240	239	238	237	236	235	234	233	232	231	230	229	228	227	226	225	224	223	222	221	220	219	

## Get label score

In [9]:
from time import sleep
def get_label_from_qid(qid):
    url = "https://query.wikidata.org/sparql"
    
    # 使用SPARQL查詢語句，取得對應的Label
    query = f"""
    SELECT ?label WHERE {{
        wd:{qid} rdfs:label ?label.
        FILTER (lang(?label) = "en")  # 只取英文Label，你也可以更改為其他語言代碼
    }}
    """
    
    # 設置HTTP標頭，指定請求的回傳格式為JSON，並指定使用UTF-8編碼
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/sparql-query; charset=utf-8"
    }
    
    try:
        # 發送HTTP POST請求，並將查詢語句進行UTF-8編碼
        response = requests.post(url, data=query.encode("utf-8"), headers=headers)
        
        # Adding a delay of 1 second between API calls to avoid overwhelming the server
        sleep(1)
        
        # 解析JSON回傳結果，取得Label
        data = response.json()
        label = data["results"]["bindings"][0]["label"]["value"] if data["results"]["bindings"] else None
        
        return label
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None


In [10]:
# 依照qid得到label，並依照\t進行斷句
def get_claims_for_item(item_id):
    base_url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": item_id,
        "format": "json",
        "props": "claims"
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    if item_id in data["entities"]:
        claims = data["entities"][item_id]["claims"]
        return claims
    else:
        return None

d = {}  # 使用字典來存儲數據
for i in range(len(df_depositar["ID"])-0):  
    print(i)
    item_id = df_depositar["ID"][i] 
    item_claims = get_label_from_qid(item_id)
    if item_claims != None:
        d[item_id] = item_claims.split()
print(d)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [11]:
# Extract all properties from the data
all_properties = set(prop for props in d.values() for prop in props)

# Calculate cosine similarity between each pair of QIDs and store in a DataFrame
results = []
wait = len(d.items())

for qid1, props1 in d.items():
    wait = wait - 1
    print(wait)
    
    for qid2, props2 in d.items():
        if qid1 != qid2:
            similarity = calculate_cosine_similarity(props1, props2, all_properties)
            results.append([qid1, qid2, similarity])

# Convert the results to a DataFrame
df_label = pd.DataFrame(results, columns=["QID1", "QID2", "L_score"])

# Display the DataFrame
print(df_label)

448
447
446
445
444
443
442
441
440
439
438
437
436
435
434
433
432
431
430
429
428
427
426
425
424
423
422
421
420
419
418
417
416
415
414
413
412
411
410
409
408
407
406
405
404
403
402
401
400
399
398
397
396
395
394
393
392
391
390
389
388
387
386
385
384
383
382
381
380
379
378
377
376
375
374
373
372
371
370
369
368
367
366
365
364
363
362
361
360
359
358
357
356
355
354
353
352
351
350
349
348
347
346
345
344
343
342
341
340
339
338
337
336
335
334
333
332
331
330
329
328
327
326
325
324
323
322
321
320
319
318
317
316
315
314
313
312
311
310
309
308
307
306
305
304
303
302
301
300
299
298
297
296
295
294
293
292
291
290
289
288
287
286
285
284
283
282
281
280
279
278
277
276
275
274
273
272
271
270
269
268
267
266
265
264
263
262
261
260
259
258
257
256
255
254
253
252
251
250
249
248
247
246
245
244
243
242
241
240
239
238
237
236
235
234
233
232
231
230
229
228
227
226
225
224
223
222
221
220
219
218
217
216
215
214
213
212
211
210
209
208
207
206
205
204
203
202
201
200
199


## Merge P_score(dataframe) and Q_score(dataframe) by the Name of Columns

In [12]:
df_pid

Unnamed: 0,QID1,QID2,Pid_score
0,Q9578202,Q11070045,0.348155
1,Q9578202,Q7481418,0.000000
2,Q9578202,Q484000,0.070014
3,Q9578202,Q865,0.076640
4,Q9578202,Q29019698,0.612372
...,...,...,...
208387,Q98692446,Q956,0.156670
208388,Q98692446,Q96977107,0.074536
208389,Q98692446,Q96977148,0.074536
208390,Q98692446,Q97173495,0.200000


In [13]:
df_qid

Unnamed: 0,QID1,QID2,Qid_score
0,Q9578202,Q11070045,0.267261
1,Q9578202,Q484000,0.000000
2,Q9578202,Q865,0.036322
3,Q9578202,Q29019698,0.353553
4,Q9578202,Q10908558,0.000000
...,...,...,...
207475,Q98692446,Q956,0.000000
207476,Q98692446,Q96977107,0.000000
207477,Q98692446,Q96977148,0.000000
207478,Q98692446,Q97173495,0.000000


In [14]:
df_label

Unnamed: 0,QID1,QID2,L_score
0,Q9578202,Q11070045,0.0
1,Q9578202,Q7481418,0.0
2,Q9578202,Q484000,0.0
3,Q9578202,Q865,0.0
4,Q9578202,Q29019698,0.0
...,...,...,...
201147,Q98,Q948732,0.0
201148,Q98,Q956,0.0
201149,Q98,Q96977107,0.0
201150,Q98,Q96977148,0.0


In [15]:
# Merge the two DataFrames on matching 'QID1' and 'QID2'
merged_df = df_qid.merge(df_pid, on=['QID1', 'QID2'], how='left')

# Rename the 'Qid_score' column from Table 2 to 'Qid_score_new' to avoid overwriting the original 'Qid_score' column from Table 1
merged_df.rename(columns={'Qid_score': 'Qid_score_new'}, inplace=True)

# Fill any NaN values in the 'Qid_score_new' column with 0
merged_df['Qid_score_new'].fillna(0, inplace=True)

# Merge the two DataFrames on matching 'QID1' and 'QID2'
merged_df = merged_df.merge(df_label, on=['QID1', 'QID2'], how='left')
merged_df.rename(columns={'Pid_score': 'Pid_score_new'}, inplace=True)
# Fill any NaN values in the 'Qid_score_new' column with 0
merged_df['Pid_score_new'].fillna(0, inplace=True)
merged_df['L_score'].fillna(0, inplace=True)
# Print the combined DataFrame
print(merged_df)

             QID1       QID2  Qid_score_new  Pid_score_new  L_score
0        Q9578202  Q11070045       0.267261       0.348155      0.0
1        Q9578202    Q484000       0.000000       0.070014      0.0
2        Q9578202       Q865       0.036322       0.076640      0.0
3        Q9578202  Q29019698       0.353553       0.612372      0.0
4        Q9578202  Q10908558       0.000000       0.258199      0.0
...           ...        ...            ...            ...      ...
207475  Q98692446       Q956       0.000000       0.156670      0.0
207476  Q98692446  Q96977107       0.000000       0.074536      0.0
207477  Q98692446  Q96977148       0.000000       0.074536      0.0
207478  Q98692446  Q97173495       0.000000       0.200000      0.0
207479  Q98692446        Q98       0.000000       0.229366      0.0

[207480 rows x 5 columns]


In [16]:
merged_df['TotalScore']=merged_df['Qid_score_new']+merged_df['Pid_score_new']+merged_df['L_score']
merged_df['W_TotalScore']=merged_df['Qid_score_new']*2+merged_df['Pid_score_new']+merged_df['L_score']*2
# Print the combined DataFrame
print(merged_df)

             QID1       QID2  Qid_score_new  Pid_score_new  L_score  \
0        Q9578202  Q11070045       0.267261       0.348155      0.0   
1        Q9578202    Q484000       0.000000       0.070014      0.0   
2        Q9578202       Q865       0.036322       0.076640      0.0   
3        Q9578202  Q29019698       0.353553       0.612372      0.0   
4        Q9578202  Q10908558       0.000000       0.258199      0.0   
...           ...        ...            ...            ...      ...   
207475  Q98692446       Q956       0.000000       0.156670      0.0   
207476  Q98692446  Q96977107       0.000000       0.074536      0.0   
207477  Q98692446  Q96977148       0.000000       0.074536      0.0   
207478  Q98692446  Q97173495       0.000000       0.200000      0.0   
207479  Q98692446        Q98       0.000000       0.229366      0.0   

        TotalScore  W_TotalScore  
0         0.615417      0.882678  
1         0.070014      0.070014  
2         0.112962      0.149283  
3      

### Save the Final_score to excel

In [17]:
# Specify the name of the excel file
file_name = 'Total_score.xlsx'
  
# Save the DataFrame to the Excel file using 'utf-8-sig' encoding
merged_df.to_excel(file_name, encoding='UTF-8', index=False)  # Set index=False to avoid saving the DataFrame index

print('Total_score record successfully exported into Excel File')

Total_score record successfully exported into Excel File


## Search a keyword to get its top-5 similar keywords in Wikidata

### Request Wikidata to get the label of Tosearch Qid 

In [18]:
from time import sleep
def get_label_from_qid_user(qid):
    url = "https://query.wikidata.org/sparql"
    
    # 使用SPARQL查詢語句，取得對應的Label
    query = f"""
    SELECT ?label WHERE {{
        wd:{qid} rdfs:label ?label.
        FILTER (lang(?label) = "zh")  # 只取英文Label，你也可以更改為其他語言代碼
    }}
    """
    
    # 設置HTTP標頭，指定請求的回傳格式為JSON，並指定使用UTF-8編碼
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/sparql-query; charset=utf-8"
    }
    
    try:
        # 發送HTTP POST請求，並將查詢語句進行UTF-8編碼
        response = requests.post(url, data=query.encode("utf-8"), headers=headers)
        
        # Adding a delay of 1 second between API calls to avoid overwhelming the server
        sleep(1)
        
        # 解析JSON回傳結果，取得Label
        data = response.json()
        label = data["results"]["bindings"][0]["label"]["value"] if data["results"]["bindings"] else None
        
        return label
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None


### Input the keyword of Tosearch and display the results 

In [21]:
# Input the QID to search for
ToSearch = input("請輸入您想要檢索的Qid：\n")

if ToSearch not in merged_df["QID1"].values:
    # Do something if 'ToSearch' is not present in the 'QID1' column
    print(f"{ToSearch} not found in the Depositar.")
else:

    # Filter the data to get rows with the specified QID1 and sort by "Cosine Similarity" in descending order
    filtered_data = merged_df[merged_df["QID1"] == ToSearch].sort_values(by="W_TotalScore", ascending=False)

    # Get the top five QID2 values
    top_five_qid2 = filtered_data.loc[filtered_data["W_TotalScore"] > 0.775, "QID2"].head(5).tolist()
    #print(top_five_qid2)
    label=get_label_from_qid_user(ToSearch)
    print("=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+")
    print(f"Qid {ToSearch} 的Label是：{label}")
    print(f"以下是Depositar資料庫內，與「{ToSearch}:{label}」相關的關鍵字Top5:")
    print("=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+")
    # 輸出成Qid：zh-label
    for index, qid in enumerate(top_five_qid2):
        label=get_label_from_qid_user(qid)
        print(f"{index+1}:Qid {qid} 的Label是：{label}")
        print("========")



請輸入您想要檢索的Qid：
 Q11070045


=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Qid Q11070045 的Label是：彰化縣政府
以下是Depositar資料庫內，與「Q11070045:彰化縣政府」相關的關鍵字Top5:
=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
1:Qid Q15909984 的Label是：苗栗縣政府
2:Qid Q133865 的Label是：彰化縣
3:Qid Q11083998 的Label是：新竹市政府
4:Qid Q5972740 的Label是：行政院公共工程委員會
5:Qid Q153221 的Label是：雲林縣


In [20]:
filtered_data

Unnamed: 0,QID1,QID2,Qid_score_new,Pid_score_new,L_score,TotalScore,W_TotalScore
687,Q11070045,Q15909984,0.285714,1.000000,0.666667,1.952381,2.904762
567,Q11070045,Q11083998,0.169031,0.858116,0.333333,1.360481,1.862845
525,Q11070045,Q133865,0.202031,0.321860,0.816497,1.340387,2.358915
609,Q11070045,Q5972740,0.308607,0.783349,0.000000,1.091956,1.400563
614,Q11070045,Q709073,0.267261,0.658145,0.000000,0.925406,1.192668
...,...,...,...,...,...,...,...
611,Q11070045,Q66309937,0.000000,0.000000,0.000000,0.000000,0.000000
770,Q11070045,Q3588927,0.000000,0.000000,0.000000,0.000000,0.000000
761,Q11070045,Q3267041,0.000000,0.000000,0.000000,0.000000,0.000000
752,Q11070045,Q27956049,0.000000,0.000000,0.000000,0.000000,0.000000
