<a id=top></a>

# **目次**

<b>
    <details>
        <summary>
            <a href="#modules", style="font-size: xx-large">1. モジュールインポート</a>
            <ul>※サードパーティライブラリ>>>自作モジュール>>>（ここまで本ipynb外）>>>自作関数（本ipynb内）</ul>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#data", style="font-size: xx-large">2. オリジナルデータインポート</a>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#patentcount", style="font-size: xx-large">3. 特許数</a>
        </summary>
        <table></table>
    </details>
    <details>
        <summary>
            <a href="#calculateindicator", style="font-size: xx-large">4. 各指標</a>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#output", style="font-size: xx-large">5. ファイルに出力</a>
        </summary>
    </details>
</b>


---


<a id=modules></a>

## **1. モジュールインポート**


In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../../src')
from ecomplexity import ecomplexity

# 小数点以下 桁数 6
pd.options.display.float_format = '{:.3f}'.format


In [2]:
import initial_condition


In [3]:
global data_dir, output_dir
data_dir = '../../data/interim/internal/filtered_after_agg/'
output_dir = '../../data/processed/internal/'
ex_dir = '../../data/processed/external/schmoch/'


In [4]:
# 初期条件
ar = initial_condition.AR
year_style = initial_condition.YEAR_STYLE

year_start = initial_condition.YEAR_START
year_end = initial_condition.YEAR_END
year_range = initial_condition.YEAR_RANGE

classification = initial_condition.CLASSIFICATION
class_weight = initial_condition.CLASS_WEIGHT
applicant_weight = initial_condition.APPLICANT_WEIGHT

extract_population = initial_condition.EXTRACT_POPULATION
top_p_or_num = initial_condition.TOP_P_OR_NUM


In [5]:
top_p_or_num

('p', 3)

In [6]:
def kh_ki(c_df, classification, n=19):
    kh1_ki1_df = pd.merge(c_df.copy(), 
                        c_df[c_df['mcp']==1].groupby(['right_person_name'])[['ubiquity']].sum().reset_index(drop=False).copy().rename(columns={'ubiquity':'kh_1'}), 
                        on=['right_person_name'], how='left')
    kh1_ki1_df = pd.merge(kh1_ki1_df.copy(), 
                        c_df[c_df['mcp']==1].groupby([classification])[['diversity']].sum().reset_index(drop=False).copy().rename(columns={'diversity':'ki_1'}), 
                        on=[classification], how='left')
    kh1_ki1_df['kh_1'] = kh1_ki1_df['kh_1'] / kh1_ki1_df['diversity']
    kh1_ki1_df['ki_1'] = kh1_ki1_df['ki_1'] / kh1_ki1_df['ubiquity']
    kh_ki_df = kh1_ki1_df.copy()
    for i in range(n):
        kh_ki_df = pd.merge(kh_ki_df, 
                            kh_ki_df[kh_ki_df['mcp']==1].groupby(['right_person_name'])[[f'ki_{i+1}']].sum().reset_index(drop=False).copy()\
                                        .rename(columns={f'ki_{i+1}':f'kh_{i+2}'}), 
                            on=['right_person_name'], how='left')
        kh_ki_df = pd.merge(kh_ki_df, 
                            kh_ki_df[kh_ki_df['mcp']==1].groupby([classification])[[f'kh_{i+1}']].sum().reset_index(drop=False).copy()\
                                        .rename(columns={f'kh_{i+1}':f'ki_{i+2}'}), 
                            on=[classification], how='left')
        kh_ki_df[f'kh_{i+2}'] = kh_ki_df[f'kh_{i+2}'] / kh_ki_df['diversity']
        kh_ki_df[f'ki_{i+2}'] = kh_ki_df[f'ki_{i+2}'] / kh_ki_df['ubiquity']
    return kh_ki_df


---


<a id=data></a>

## **2. オリジナルデータインポート**


In [7]:
reg_num_top_df = pd.read_csv(f'{data_dir}{ar}_{year_style}_{extract_population}_{top_p_or_num[0]}_{top_p_or_num[1]}.csv', 
                             encoding='utf-8',
                             sep=',')
reg_num_top_df

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num
0,1981-2010,キヤノン株式会社,9,23723.000
1,1981-2010,キヤノン株式会社,28,9996.333
2,1981-2010,キヤノン株式会社,2,9525.167
3,1981-2010,キヤノン株式会社,6,7450.333
4,1981-2010,キヤノン株式会社,3,6352.500
...,...,...,...,...
105694,2001-2010,東北電力株式会社,9,0.071
105695,2001-2010,北海道電力株式会社,31,0.071
105696,2001-2010,北陸電力株式会社,9,0.071
105697,2001-2010,九州電力株式会社,31,0.071


In [8]:
reg_num_top_df['right_person_name'].nunique()

1937

<a href=#top>先頭に戻る</a>

---


<a id=calculateindicator></a>

## **4. 各指標**


In [9]:
trade_cols = {'time':f'{ar}_{year_style}_period', 'loc':'right_person_name', 'prod':classification, 'val':'reg_num'}
rename_col_dict = {'eci':'kci', 'pci':'tci'}
col_order_list = [f'{ar}_{year_style}_period', 'right_person_name', classification, 'reg_num', 'rca', 'mcp', 'diversity', 'ubiquity', 'kci', 'tci']


In [10]:
c_df = ecomplexity(reg_num_top_df,
                   cols_input = trade_cols, 
                   rca_mcp_threshold = 1)
c_df = c_df[c_df['reg_num'] > 0]\
           .rename(columns=rename_col_dict)\
           [col_order_list]
c_df = pd.concat([kh_ki(c_df[c_df[f'{ar}_{year_style}_period'] == period], classification) for period in c_df[f'{ar}_{year_style}_period'].unique()], 
                 axis='index', 
                 ignore_index=True)

# for segment in c_df[f'{ar}_{year_style}_period'].unique():
#     display(c_df[c_df[f'{ar}_{year_style}_period'] == segment].head())
#     display(c_df[c_df[f'{ar}_{year_style}_period'] == segment].describe())
#     print(c_df[c_df[f'{ar}_{year_style}_period'] == segment].info())
#     print('\n')


1981-2010
1981-1990
1991-2000
2001-2010


In [11]:
c_df.sort_values(by=[f'{ar}_{year_style}_period', 'kci'], ascending=[True, False])

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
46316,1981-1990,杏林製薬株式会社,14,39.000,31.907,1,2,271,2.317,2.286,...,6.598,328.132,328.137,6.591,6.591,327.896,327.900,6.587,6.587,327.749
46317,1981-1990,杏林製薬株式会社,16,10.000,38.989,1,2,223,2.317,2.349,...,6.598,328.143,328.137,6.591,6.591,327.904,327.900,6.587,6.587,327.754
46318,1981-1990,杏林製薬株式会社,25,1.000,0.660,0,2,481,2.317,-0.033,...,6.598,327.535,328.137,6.582,6.591,327.526,327.900,6.582,6.587,327.524
41700,1981-1990,国立大学法人九州工業大学,14,4.000,40.906,1,1,271,2.286,2.286,...,6.598,328.132,328.132,6.591,6.591,327.896,327.896,6.587,6.587,327.749
49451,1981-1990,株式会社マンダム,14,1.000,40.906,1,1,271,2.286,2.286,...,6.598,328.132,328.132,6.591,6.591,327.896,327.896,6.587,6.587,327.749
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98046,2001-2010,株式会社ナカヨ,7,1.000,0.622,0,3,229,-3.606,-1.879,...,7.104,335.657,335.170,7.109,7.108,335.915,335.656,7.110,7.109,336.052
98047,2001-2010,株式会社ナカヨ,10,1.000,0.070,0,3,456,-3.606,-0.441,...,7.104,336.076,335.170,7.111,7.108,336.136,335.656,7.111,7.109,336.168
98048,2001-2010,株式会社ナカヨ,12,1.000,0.191,0,3,307,-3.606,-1.724,...,7.104,335.736,335.170,7.109,7.108,335.954,335.656,7.110,7.109,336.071
98049,2001-2010,株式会社ナカヨ,34,0.333,0.081,0,3,260,-3.606,0.180,...,7.104,336.237,335.170,7.111,7.108,336.224,335.656,7.112,7.109,336.216


In [12]:
c_df[(c_df[classification]==22)&(c_df[f'{ar}_{year_style}_period']=='1981-1990')]

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
46302,1981-1990,本田技研工業株式会社,22,0.5,53.532,1,8,2,0.011,0.92,...,6.581,327.86,327.589,6.585,6.582,327.714,327.552,6.584,6.582,327.633
47978,1981-1990,株式会社クラレ,22,0.5,170.182,1,13,2,1.124,0.92,...,6.589,327.86,327.84,6.585,6.586,327.714,327.714,6.584,6.585,327.633


<a href="#top">先頭に戻る</a>

---


<a id=output></a>

## **5. ファイルに出力**

<a id=rightperson></a>

### **5.1. 特許権者**


In [13]:

right_person_df = pd.merge(c_df.groupby([f'{ar}_{year_style}_period', 'right_person_name'])[['reg_num']].sum().reset_index(drop=False), 
                           c_df.groupby([f'{ar}_{year_style}_period', 'right_person_name'])[[classification]].nunique().reset_index(drop=False), 
                           on=[f'{ar}_{year_style}_period', 'right_person_name'], 
                           how='inner')
right_person_df = pd.merge(right_person_df, 
                           c_df[[f'{ar}_{year_style}_period', 'right_person_name', 'diversity', 'kci']\
                               +[f'kh_{i}' for i in range(1, 20+1)]]\
                               .drop_duplicates(keep='first'), 
                           on=[f'{ar}_{year_style}_period', 'right_person_name'], 
                           how='inner')
# for period in right_person_df[f'{ar}_{year_style}_period'].unique():
#     for i in range(1, 20+1):
#         value = right_person_df[right_person_df[f'{ar}_{year_style}_period']==period]
#         right_person_df[right_person_df[f'{ar}_{year_style}_period']==period][f'kh_{i}'] = (value[f'kh_{i}'] - value[f'kh_{i}'].mean()) / value[f'kh_{i}'].std()
#     display(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].head())
#     display(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].describe())
#     print(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].info())
#     print('\n')
# right_person_df['reg_num'] = right_person_df['reg_num'].astype(np.int64)

In [14]:
right_person_df.to_csv(f'{output_dir}firms/{ar}_{year_style}_{top_p_or_num[0]}_{top_p_or_num[1]}.csv', 
                       encoding='utf-8', 
                       sep=',', 
                       index=False)


<a href=#top>先頭に戻る</a>

---


<a id=ipc></a>

### **5.2. IPC**


In [15]:
# 各期間
classification_df = pd.merge(c_df.groupby([f'{ar}_{year_style}_period', classification])[['reg_num']].sum().reset_index(drop=False), 
                        c_df.groupby([f'{ar}_{year_style}_period', classification])[['right_person_name']].nunique().reset_index(drop=False), 
                        on=[f'{ar}_{year_style}_period', classification], 
                        how='inner')
classification_df = pd.merge(classification_df, 
                      c_df[[f'{ar}_{year_style}_period', classification, 'ubiquity', 'tci']\
                          +[f'ki_{i}' for i in range(1, 20+1)]]\
                          .drop_duplicates(keep='first'), 
                      on=[f'{ar}_{year_style}_period', classification], 
                      how='inner')
# classification_df['reg_num'] = classification_df['reg_num'].astype(np.int64)
display(classification_df)


Unnamed: 0,app_nendo_period,schmoch35,reg_num,right_person_name,ubiquity,tci,ki_1,ki_2,ki_3,ki_4,...,ki_11,ki_12,ki_13,ki_14,ki_15,ki_16,ki_17,ki_18,ki_19,ki_20
0,1981-1990,1,53259.148,917,306,-1.355,5.843,305.401,6.283,314.224,...,6.555,326.062,6.567,326.679,6.573,327.037,6.577,327.244,6.579,327.364
1,1981-1990,2,52583.581,676,169,-2.368,5.935,251.080,6.276,288.586,...,6.543,324.312,6.559,325.755,6.568,326.543,6.574,326.977,6.577,327.218
2,1981-1990,3,24203.537,430,146,-3.020,6.192,233.692,6.303,278.397,...,6.533,323.470,6.553,325.293,6.565,326.286,6.572,326.833,6.576,327.137
3,1981-1990,4,4189.387,189,101,-3.304,6.703,219.071,6.353,272.135,...,6.529,323.051,6.550,325.067,6.563,326.163,6.571,326.765,6.576,327.099
4,1981-1990,5,12301.667,331,158,-3.065,6.089,230.525,6.290,276.443,...,6.533,323.357,6.553,325.236,6.565,326.257,6.572,326.818,6.576,327.129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,2001-2010,31,32532.082,989,351,0.194,6.031,352.119,6.592,346.104,...,7.088,336.764,7.101,336.469,7.107,336.330,7.110,336.265,7.111,336.235
136,2001-2010,32,59563.310,682,229,-0.216,5.533,325.509,6.417,338.377,...,7.080,336.424,7.097,336.287,7.105,336.232,7.108,336.212,7.110,336.206
137,2001-2010,33,57650.417,679,183,-0.444,4.399,275.439,5.861,313.865,...,7.060,335.724,7.088,335.995,7.101,336.107,7.107,336.157,7.110,336.181
138,2001-2010,34,17092.000,704,260,0.180,6.277,317.767,6.768,331.479,...,7.103,336.267,7.108,336.253,7.110,336.237,7.111,336.224,7.112,336.216


In [16]:
classification_df.to_csv(f'{output_dir}technology/{ar}_{year_style}_{top_p_or_num[0]}_{top_p_or_num[1]}.csv', 
                        encoding='utf-8', 
                        sep=',', 
                        index=False)


In [17]:
schmoch_df = pd.read_csv(f'{ex_dir}35.csv', 
                         encoding='utf-8', 
                         sep=',', 
                         usecols=['Field_number', 'Field_en']
                         ).drop_duplicates()

In [18]:
schmoch_df

Unnamed: 0,Field_number,Field_en
0,1,"Electrical machinery, apparatus, energy"
30,2,Audio-visual technology
48,3,Telecommunications
58,4,Digital communication
61,5,Basic communication processes
71,6,Computer technology
88,7,IT methods for management
89,8,Semiconductors
91,9,Optics
101,10,Measurement


<a href=#top>先頭に戻る</a>

---


<a id=network></a>

## **5.3. 二部グラフ用**


In [19]:
eneos_df = c_df[(c_df[f'{ar}_{year_style}_period']==f'{year_start}-{year_end}')&(c_df['right_person_name'].str.contains('ＥＮＥＯＳ'))\
                &(c_df['mcp']==1)].copy()#[['right_person_name', 'reg_num', 'schmoch35']].copy()
eneos_df = pd.merge(eneos_df, 
                    schmoch_df.rename(columns={'Field_number':'schmoch35'})\
                              .drop_duplicates(keep='first'), 
                    on=['schmoch35'], 
                    how='inner')
eneos_df[['ubiquity', 'Field_en', 'ki_1']]

Unnamed: 0,ubiquity,Field_en,ki_1
0,356,"Electrical machinery, apparatus, energy",6.469
1,356,"Electrical machinery, apparatus, energy",6.469
2,339,Organic fine chemistry,8.283
3,339,Organic fine chemistry,8.283
4,339,Organic fine chemistry,8.283
5,302,"Macromolecular chemistry, polymers",8.272
6,302,"Macromolecular chemistry, polymers",8.272
7,302,"Macromolecular chemistry, polymers",8.272
8,474,Basic materials chemistry,8.086
9,474,Basic materials chemistry,8.086


In [20]:
# c_df[c_df['']]

In [21]:
# graph_df = pd.concat([c_df, c_df], axis='index')
# graph_df = graph_df[graph_df['mcp']==1][[f'{ar}_{year_style}', 'right_person_name', 'ipc_class', 'mcp']]
# graph_df

In [22]:
# graph_df.to_csv(f'../Data/0_Graph/{ar}_{year_start}_{year_end}.csv', 
#                 encoding='utf-8', 
#                 sep=',', 
#                 index=False)
# graph_df
