# Programmable Web需求关键词视角下的历史调用记录分析
## 1.统计 Mashup中的包含Web API个数、Web API被使用的次数和Web API提供商发布Web API的个数

In [13]:
import pandas as pd
import numpy as np
import sys
import openpyxl as pxl
from matplotlib import pyplot as plt
import json
from pyecharts.charts import Pie
from pyecharts import options as opts
from collections import Counter

In [37]:
data = pd.read_csv('mashup_nodes_estimator.csv', sep='\t')
apiSum = data["name"].count()
print("Mashup中包含Web API个数：", apiSum)
companyProvide = data.groupby("url")["url"].count()
print(companyProvide)

Mashup中包含Web API个数： 7766
url
Http://www.freakstreets.com                                          1
Http://www.mashupstation.com/station/users/admin/dapptest.html       1
Undefined                                                         1519
http://1000songs.ebotunes.com                                        1
http://100milediet.org/get-started/map                               1
                                                                  ... 
https://www.xtracked.com/                                            1
https://www.youcallmd.com/                                           1
https://www.yuntechnologies.com/documentsigner.html                  1
https://xen.do/                                                      1
https://zilyo.com/                                                   1
Name: url, Length: 6212, dtype: int64


In [38]:
data = pd.read_csv('m-a_edges.csv', sep='\t')
apiSum = data.groupby("target")["target"].count()
print("每个API使用次数：", apiSum)

每个API使用次数： target
/api/123-shop-pro      1
/api/123contactform    1
/api/12secondstv       1
/api/140-proof         1
/api/18amail           1
                      ..
/api/zooomr            4
/api/zoopla            2
/api/zootool           1
/api/zotero            2
/api/zvents            7
Name: target, Length: 1508, dtype: int64


In [None]:
with open('graph_data_backup/data0906-1.txt', 'r', encoding='utf-8') as act_file:
    act_mashups = json.load(act_file)
    act_mashups = [m for m in act_mashups if m is not None]

with open('graph_data_backup/deadpool_mashups0917.txt', 'r', encoding='utf-8') as dead_file:
    dead_mashups = json.load(dead_file)
    dead_mashups = [m for m in dead_mashups if m is not None]

all_mashups = act_mashups + dead_mashups

api_usage_counter = Counter()
for ma in all_mashups:
    ras = ma['related_apis']
    for ra in ras:
        if ra is None:
            continue
        api_usage_counter[ra['url']] += 1

print("Web API使用次数", len(api_usage_counter))

In [None]:
def get_usage(url):
    return api_usage_counter[url]

get_usage('/api/google-maps')

mashup_count_pairs = {}
for pl in data:
    temp_pair = (pl[0], pl[1], pl[2])
    for mashup in api_pair_mashups[temp_pair]:
        if mashup in mashup_count_pairs:
            mashup_count_pairs[mashup].append(pl)
        else:
            mashup_count_pairs[mashup] = [pl]

list(mashup_count_pairs.items())[:3]

In [None]:
def is_accessible(title):
    return acc_dict.get(title, False)

api_pairs = {}    # mashup和他的pair
api_pair_mashups = {}
mashups_name_dict = {}
for dd in all_mashups:
    mashups_name_dict[dd['title']] = dd
    ra_links = []
    if dd is None:
        continue
    for ra in dd['related_apis']:
        if ra is None:
            continue
        ra_links.append(ra['url'])
    pairs = produce_pair(ra_links, False, True)
    api_pairs[dd['title']] = pairs
    for pair in pairs:
        if pair in api_pair_mashups:
            api_pair_mashups[pair].append(dd['title'])
        else:
            api_pair_mashups[pair] = [dd['title']]

len(api_pairs), len(api_pair_mashups)


api_pairs_counter_all = Counter()
api_pairs_counter_act = Counter()
for mashup_name in api_pairs:
    pairs = api_pairs[mashup_name]
    for pair in pairs:
        api_pairs_counter_all[pair] += 1
        if is_accessible(mashup_name):
            api_pairs_counter_act[pair] += 1
# 整理数据
api_pairs_list = []
for pair in api_pairs_counter_all.keys():
    if pair in api_pairs_counter_act:
        new_pair = (pair[0], pair[1], pair[2], api_pairs_counter_act[pair], api_pairs_counter_all[pair], 0.)
    else:
        # 不在act字典，说明没有被活着的mashup引用
        new_pair = (pair[0], pair[1], pair[2], 0, api_pairs_counter_all[pair], 0.)
    api_pairs_list.append(new_pair)
api_pairs_list = sorted(api_pairs_list, key=lambda x: x[4], reverse=True)
api_pairs_list = remove_symmetry(api_pairs_list)
api_pairs_list[:10]
# 统计一下mashup平均存活时间
# 这里的存活时间是pw的
# 为了方便统计多种情况写一个函数
def get_table_data(api_pairs_list, dead_time='oet'):
    new_pair_list = []
    npday = np.timedelta64(1, 'D')
    for pair in api_pairs_list:
        temp_pair = (pair[0], pair[1], pair[2])
        using_mashup_names = api_pair_mashups[temp_pair]  # 获得了names，要获得详情
        if len(using_mashup_names) == 0:
            print(temp_pair)
            continue
        act_use = 0
        all_use = 0
        alive_days_list = []
        for name in using_mashup_names:
            # mashup_obj = mashups_name_dict[name]
            st, et, oet = submit_dead_time_dict.get(name, (None, None, None))
            # st, et, oet = st.dt.year.values[0], et.dt.year.values[0], oet.dt.year.values[0]
            if st is None:
                # 这个mashup不在csv文件中，直接舍弃
                continue
            # st 不是None，all_use += 1
            all_use += 1
            if dead_time == 'oet':
                # 用oet判断存活与否
                if oet.dt.year.values[0] > 2200:
                    # 活着的mashup
                    act_use += 1
                    # alive_days = '-' # 到目前依然存活
                else:
                    # 已死亡的mashup
                    alive_days = oet - st
                    alive_days_list.append(alive_days.values[0] / npday)
            elif dead_time == 'et':
                # 用et判断存活与否
                if et.dt.year.values[0] > 2200:
                    # 活着的mashup
                    act_use += 1
                else:
                    # 已死亡的mashup
                    alive_days = et - st
                    alive_days_list.append(alive_days.values[0] / npday)
            else:
                print('Use et or oet')
                return None
        if all_use == 0:
            print(temp_pair)
            continue
        dead_use = all_use - act_use
        if dead_use == 0:
            new_pair = (pair[0], pair[1], pair[2], act_use, all_use, act_use / float(all_use), -1)
        else:
            new_pair = (
            pair[0], pair[1], pair[2], act_use, all_use, act_use / float(all_use), sum(alive_days_list) / dead_use)
        new_pair_list.append(new_pair)

    return new_pair_list


table_data = get_table_data(api_pairs_list, dead_time='et')