<a id=top></a>

# **目次**

<b>
    <details>
        <summary>
            <a href="#modules", style="font-size: xx-large">1. モジュールインポート</a>
            <ul>※サードパーティライブラリ>>>自作モジュール>>>（ここまで本ipynb外）>>>自作関数（本ipynb内）</ul>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#data", style="font-size: xx-large">2. オリジナルデータインポート</a>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#patentcount", style="font-size: xx-large">3. 特許数</a>
        </summary>
        <table></table>
    </details>
    <details>
        <summary>
            <a href="#calculateindicator", style="font-size: xx-large">4. 各指標</a>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#output", style="font-size: xx-large">5. ファイルに出力</a>
        </summary>
    </details>
</b>


---


<a id=modules></a>

## **1. モジュールインポート**


In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../../src')
from ecomplexity import ecomplexity

# 小数点以下 桁数 6
pd.options.display.float_format = '{:.3f}'.format


In [2]:
import initial_condition


In [3]:
global data_dir, output_dir
data_dir = '../../data/interim/internal/filtered_after_agg/'
output_dir = '../../data/processed/internal/'
ex_dir = '../../data/processed/external/schmoch/'


In [4]:
# 初期条件
ar = initial_condition.AR
year_style = initial_condition.YEAR_STYLE

year_start = initial_condition.YEAR_START
year_end = initial_condition.YEAR_END
year_range = initial_condition.YEAR_RANGE

classification = initial_condition.CLASSIFICATION
class_weight = initial_condition.CLASS_WEIGHT
applicant_weight = initial_condition.APPLICANT_WEIGHT

extract_population = initial_condition.EXTRACT_POPULATION
top_p_or_num = initial_condition.TOP_P_OR_NUM


In [5]:
top_p_or_num

('p', 3)

In [6]:
def kh_ki(c_df, classification, n=19):
    kh1_ki1_df = pd.merge(c_df.copy(), 
                        c_df[c_df['mcp']==1].groupby(['right_person_name'])[['ubiquity']].sum().reset_index(drop=False).copy().rename(columns={'ubiquity':'kh_1'}), 
                        on=['right_person_name'], how='left')
    kh1_ki1_df = pd.merge(kh1_ki1_df.copy(), 
                        c_df[c_df['mcp']==1].groupby([classification])[['diversity']].sum().reset_index(drop=False).copy().rename(columns={'diversity':'ki_1'}), 
                        on=[classification], how='left')
    kh1_ki1_df['kh_1'] = kh1_ki1_df['kh_1'] / kh1_ki1_df['diversity']
    kh1_ki1_df['ki_1'] = kh1_ki1_df['ki_1'] / kh1_ki1_df['ubiquity']
    kh_ki_df = kh1_ki1_df.copy()
    for i in range(n):
        kh_ki_df = pd.merge(kh_ki_df, 
                            kh_ki_df[kh_ki_df['mcp']==1].groupby(['right_person_name'])[[f'ki_{i+1}']].sum().reset_index(drop=False).copy()\
                                        .rename(columns={f'ki_{i+1}':f'kh_{i+2}'}), 
                            on=['right_person_name'], how='left')
        kh_ki_df = pd.merge(kh_ki_df, 
                            kh_ki_df[kh_ki_df['mcp']==1].groupby([classification])[[f'kh_{i+1}']].sum().reset_index(drop=False).copy()\
                                        .rename(columns={f'kh_{i+1}':f'ki_{i+2}'}), 
                            on=[classification], how='left')
        kh_ki_df[f'kh_{i+2}'] = kh_ki_df[f'kh_{i+2}'] / kh_ki_df['diversity']
        kh_ki_df[f'ki_{i+2}'] = kh_ki_df[f'ki_{i+2}'] / kh_ki_df['ubiquity']
    return kh_ki_df


---


<a id=data></a>

## **2. オリジナルデータインポート**


In [7]:
reg_num_top_df = pd.read_csv(f'{data_dir}{ar}_{year_style}_{extract_population}_{top_p_or_num[0]}_{top_p_or_num[1]}.csv', 
                             encoding='utf-8',
                             sep=',')
reg_num_top_df

Unnamed: 0,app_year_period,right_person_name,schmoch35,reg_num
0,1981-2010,キヤノン株式会社,9,23609.000
1,1981-2010,キヤノン株式会社,28,9938.833
2,1981-2010,キヤノン株式会社,2,9424.667
3,1981-2010,キヤノン株式会社,6,7389.333
4,1981-2010,キヤノン株式会社,3,6325.500
...,...,...,...,...
105079,2001-2010,四国電力株式会社,9,0.071
105080,2001-2010,五洋建設株式会社,31,0.071
105081,2001-2010,九州電力株式会社,31,0.071
105082,2001-2010,日本原子力発電株式会社,9,0.071


In [8]:
reg_num_top_df['right_person_name'].nunique()

1929

<a href=#top>先頭に戻る</a>

---


<a id=calculateindicator></a>

## **4. 各指標**


In [9]:
trade_cols = {'time':f'{ar}_{year_style}_period', 'loc':'right_person_name', 'prod':classification, 'val':'reg_num'}
rename_col_dict = {'eci':'kci', 'pci':'tci'}
col_order_list = [f'{ar}_{year_style}_period', 'right_person_name', classification, 'reg_num', 'rca', 'mcp', 'diversity', 'ubiquity', 'kci', 'tci']


In [10]:
c_df = ecomplexity(reg_num_top_df,
                   cols_input = trade_cols, 
                   rca_mcp_threshold = 1)
c_df = c_df[c_df['reg_num'] > 0]\
           .rename(columns=rename_col_dict)\
           [col_order_list]
c_df = pd.concat([kh_ki(c_df[c_df[f'{ar}_{year_style}_period'] == period], classification) for period in c_df[f'{ar}_{year_style}_period'].unique()], 
                 axis='index', 
                 ignore_index=True)

# for segment in c_df[f'{ar}_{year_style}_period'].unique():
#     display(c_df[c_df[f'{ar}_{year_style}_period'] == segment].head())
#     display(c_df[c_df[f'{ar}_{year_style}_period'] == segment].describe())
#     print(c_df[c_df[f'{ar}_{year_style}_period'] == segment].info())
#     print('\n')


1981-2010
1981-1990
1991-2000


2001-2010


In [11]:
c_df.sort_values(by=[f'{ar}_{year_style}_period', 'kci'], ascending=[True, False])

Unnamed: 0,app_year_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
53008,1981-1990,第一三共ヘルスケア株式会社,16,3.000,198.810,1,1,218,2.359,2.359,...,6.599,325.454,325.454,6.591,6.591,325.220,325.220,6.588,6.588,325.075
46030,1981-1990,杏林製薬株式会社,14,41.000,32.905,1,2,268,2.324,2.288,...,6.598,325.441,325.447,6.591,6.591,325.212,325.216,6.587,6.587,325.069
46031,1981-1990,杏林製薬株式会社,16,8.000,31.810,1,2,218,2.324,2.359,...,6.598,325.454,325.447,6.591,6.591,325.220,325.216,6.588,6.587,325.075
46032,1981-1990,杏林製薬株式会社,25,1.000,0.655,0,2,476,2.324,-0.033,...,6.598,324.864,325.447,6.582,6.591,324.854,325.216,6.582,6.587,324.851
41442,1981-1990,国立大学法人九州工業大学,14,4.000,40.128,1,1,268,2.288,2.288,...,6.598,325.441,325.441,6.591,6.591,325.212,325.212,6.587,6.587,325.069
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97436,2001-2010,株式会社ナカヨ,10,1.000,0.073,0,3,451,-3.592,-0.392,...,7.061,334.158,333.148,7.069,7.065,334.216,333.675,7.070,7.067,334.248
97437,2001-2010,株式会社ナカヨ,12,1.000,0.201,0,3,303,-3.592,-1.755,...,7.061,333.759,333.148,7.067,7.065,334.000,333.675,7.068,7.067,334.131
97438,2001-2010,株式会社ナカヨ,34,0.333,0.084,0,3,260,-3.592,0.154,...,7.061,334.315,333.148,7.070,7.065,334.303,333.675,7.070,7.067,334.296
97439,2001-2010,株式会社ナカヨ,35,1.000,0.113,0,3,420,-3.592,0.383,...,7.061,334.455,333.148,7.069,7.065,334.372,333.675,7.070,7.067,334.330


In [12]:
c_df[(c_df[classification]==22)&(c_df[f'{ar}_{year_style}_period']=='1981-1990')]

Unnamed: 0,app_year_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
47689,1981-1990,株式会社クラレ,22,0.5,341.378,1,13,1,1.205,2.03,...,6.59,325.378,325.172,6.59,6.587,325.172,325.045,6.587,6.585,325.045


<a href="#top">先頭に戻る</a>

---


<a id=output></a>

## **5. ファイルに出力**

<a id=rightperson></a>

### **5.1. 特許権者**


In [13]:

right_person_df = pd.merge(c_df.groupby([f'{ar}_{year_style}_period', 'right_person_name'])[['reg_num']].sum().reset_index(drop=False), 
                           c_df.groupby([f'{ar}_{year_style}_period', 'right_person_name'])[[classification]].nunique().reset_index(drop=False), 
                           on=[f'{ar}_{year_style}_period', 'right_person_name'], 
                           how='inner')
right_person_df = pd.merge(right_person_df, 
                           c_df[[f'{ar}_{year_style}_period', 'right_person_name', 'diversity', 'kci']\
                               +[f'kh_{i}' for i in range(1, 20+1)]]\
                               .drop_duplicates(keep='first'), 
                           on=[f'{ar}_{year_style}_period', 'right_person_name'], 
                           how='inner')
# for period in right_person_df[f'{ar}_{year_style}_period'].unique():
#     for i in range(1, 20+1):
#         value = right_person_df[right_person_df[f'{ar}_{year_style}_period']==period]
#         right_person_df[right_person_df[f'{ar}_{year_style}_period']==period][f'kh_{i}'] = (value[f'kh_{i}'] - value[f'kh_{i}'].mean()) / value[f'kh_{i}'].std()
#     display(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].head())
#     display(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].describe())
#     print(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].info())
#     print('\n')
# right_person_df['reg_num'] = right_person_df['reg_num'].astype(np.int64)

In [14]:
right_person_df.to_csv(f'{output_dir}firms/{ar}_{year_style}_{top_p_or_num[0]}_{top_p_or_num[1]}.csv', 
                       encoding='utf-8', 
                       sep=',', 
                       index=False)


<a href=#top>先頭に戻る</a>

---


<a id=ipc></a>

### **5.2. IPC**


In [15]:
# 各期間
classification_df = pd.merge(c_df.groupby([f'{ar}_{year_style}_period', classification])[['reg_num']].sum().reset_index(drop=False), 
                        c_df.groupby([f'{ar}_{year_style}_period', classification])[['right_person_name']].nunique().reset_index(drop=False), 
                        on=[f'{ar}_{year_style}_period', classification], 
                        how='inner')
classification_df = pd.merge(classification_df, 
                      c_df[[f'{ar}_{year_style}_period', classification, 'ubiquity', 'tci']\
                          +[f'ki_{i}' for i in range(1, 20+1)]]\
                          .drop_duplicates(keep='first'), 
                      on=[f'{ar}_{year_style}_period', classification], 
                      how='inner')
# classification_df['reg_num'] = classification_df['reg_num'].astype(np.int64)
display(classification_df)


Unnamed: 0,app_year_period,schmoch35,reg_num,right_person_name,ubiquity,tci,ki_1,ki_2,ki_3,ki_4,...,ki_11,ki_12,ki_13,ki_14,ki_15,ki_16,ki_17,ki_18,ki_19,ki_20
0,1981-1990,1,53234.148,912,300,-1.356,5.800,302.377,6.272,311.493,...,6.555,323.399,6.567,324.015,6.573,324.370,6.577,324.576,6.579,324.694
1,1981-1990,2,52234.681,676,165,-2.402,5.830,246.742,6.238,285.156,...,6.542,321.623,6.559,323.079,6.568,323.871,6.574,324.306,6.578,324.547
2,1981-1990,3,23838.704,422,144,-3.024,6.243,230.501,6.302,275.708,...,6.532,320.856,6.552,322.658,6.565,323.637,6.572,324.175,6.576,324.473
3,1981-1990,4,4176.887,187,100,-3.299,6.690,217.850,6.339,270.287,...,6.528,320.476,6.550,322.452,6.563,323.524,6.571,324.112,6.576,324.438
4,1981-1990,5,12226.667,327,157,-3.086,6.057,227.268,6.274,273.558,...,6.532,320.718,6.552,322.587,6.564,323.600,6.572,324.156,6.576,324.463
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,2001-2010,31,32259.182,992,353,0.213,6.062,351.410,6.580,344.874,...,7.048,334.903,7.060,334.580,7.065,334.427,7.068,334.354,7.069,334.320
136,2001-2010,32,58969.810,685,234,-0.232,5.534,324.329,6.386,336.496,...,7.038,334.498,7.055,334.361,7.063,334.308,7.067,334.289,7.069,334.284
137,2001-2010,33,56681.917,685,185,-0.402,4.427,278.482,5.849,314.587,...,7.020,333.896,7.047,334.113,7.060,334.202,7.065,334.243,7.068,334.263
138,2001-2010,34,17090.667,708,260,0.154,6.196,315.534,6.718,329.397,...,7.061,334.339,7.067,334.329,7.069,334.315,7.070,334.303,7.070,334.296


In [16]:
classification_df.to_csv(f'{output_dir}technology/{ar}_{year_style}_{top_p_or_num[0]}_{top_p_or_num[1]}.csv', 
                        encoding='utf-8', 
                        sep=',', 
                        index=False)


In [17]:
schmoch_df = pd.read_csv(f'{ex_dir}35.csv', 
                         encoding='utf-8', 
                         sep=',', 
                         usecols=['Field_number', 'Field_en']
                         ).drop_duplicates()

In [18]:
schmoch_df

Unnamed: 0,Field_number,Field_en
0,1,"Electrical machinery, apparatus, energy"
30,2,Audio-visual technology
48,3,Telecommunications
58,4,Digital communication
61,5,Basic communication processes
71,6,Computer technology
88,7,IT methods for management
89,8,Semiconductors
91,9,Optics
101,10,Measurement


<a href=#top>先頭に戻る</a>

---


<a id=network></a>

## **5.3. 二部グラフ用**


In [19]:
eneos_df = c_df[(c_df[f'{ar}_{year_style}_period']==f'{year_start}-{year_end}')&(c_df['right_person_name'].str.contains('ＥＮＥＯＳ'))\
                &(c_df['mcp']==1)].copy()#[['right_person_name', 'reg_num', 'schmoch35']].copy()
eneos_df = pd.merge(eneos_df, 
                    schmoch_df.rename(columns={'Field_number':'schmoch35'})\
                              .drop_duplicates(keep='first'), 
                    on=['schmoch35'], 
                    how='inner')
eneos_df[['ubiquity', 'Field_en', 'ki_1']]

Unnamed: 0,ubiquity,Field_en,ki_1
0,352,"Electrical machinery, apparatus, energy",6.438
1,352,"Electrical machinery, apparatus, energy",6.438
2,333,Organic fine chemistry,8.228
3,333,Organic fine chemistry,8.228
4,297,"Macromolecular chemistry, polymers",8.236
5,297,"Macromolecular chemistry, polymers",8.236
6,297,"Macromolecular chemistry, polymers",8.236
7,470,Basic materials chemistry,8.021
8,470,Basic materials chemistry,8.021
9,470,Basic materials chemistry,8.021


In [21]:
# c_df[c_df['']]

In [None]:
# graph_df = pd.concat([c_df, c_df], axis='index')
# graph_df = graph_df[graph_df['mcp']==1][[f'{ar}_{year_style}', 'right_person_name', 'ipc_class', 'mcp']]
# graph_df

In [None]:
# graph_df.to_csv(f'../Data/0_Graph/{ar}_{year_start}_{year_end}.csv', 
#                 encoding='utf-8', 
#                 sep=',', 
#                 index=False)
# graph_df
