<a id=top></a>

# **目次**

<b>
    <details>
        <summary>
            <a href="#modules", style="font-size: xx-large">1. モジュールインポート</a>
            <ul>※サードパーティライブラリ>>>自作モジュール>>>（ここまで本ipynb外）>>>自作関数（本ipynb内）</ul>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#data", style="font-size: xx-large">2. オリジナルデータインポート</a>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#patentcount", style="font-size: xx-large">3. 特許数</a>
        </summary>
        <table></table>
    </details>
    <details>
        <summary>
            <a href="#calculateindicator", style="font-size: xx-large">4. 各指標</a>
        </summary>
    </details>
    <details>
        <summary>
            <a href="#output", style="font-size: xx-large">5. ファイルに出力</a>
        </summary>
    </details>
</b>


---


<a id=modules></a>

## **1. モジュールインポート**


In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../../src')
from ecomplexity import ecomplexity

# 小数点以下 桁数 6
pd.options.display.float_format = '{:.3f}'.format


In [2]:
import initial_condition


In [3]:
global data_dir, output_dir
data_dir = '../../data/interim/internal/filtered_after_agg/'
output_dir = '../../data/processed/internal/'
ex_dir = '../../data/processed/external/schmoch/'


In [4]:
# 初期条件
ar = initial_condition.AR
year_style = initial_condition.YEAR_STYLE

year_start = initial_condition.YEAR_START
year_end = initial_condition.YEAR_END
year_range = initial_condition.YEAR_RANGE

extract_population = initial_condition.EXTRACT_POPULATION
top_p_or_num = initial_condition.TOP_P_OR_NUM
# top_p_or_num = ('p', 100)
region_corporation = initial_condition.REGION_CORPORATION
# region_corporation = 'right_person_addr'
applicant_weight = initial_condition.APPLICANT_WEIGHT

classification = initial_condition.CLASSIFICATION
class_weight = initial_condition.CLASS_WEIGHT

input_condition = f'{ar}_{year_style}_{extract_population}_{top_p_or_num[0]}_{top_p_or_num[1]}_{region_corporation}_{applicant_weight}_{classification}_{class_weight}'
output_condition = f'{ar}_{year_style}_{extract_population}_{top_p_or_num[0]}_{top_p_or_num[1]}_{region_corporation}_{applicant_weight}_{classification}_{class_weight}'


In [5]:
input_condition

'app_nendo_all_p_3_right_person_name_fraction_schmoch35_fraction'

In [6]:
def kh_ki(c_df, classification, n=19):
    kh1_ki1_df = pd.merge(c_df.copy(), 
                        c_df[c_df['mcp']==1].groupby([region_corporation])[['ubiquity']].sum().reset_index(drop=False).copy().rename(columns={'ubiquity':'kh_1'}), 
                        on=[region_corporation], how='left')
    kh1_ki1_df = pd.merge(kh1_ki1_df.copy(), 
                        c_df[c_df['mcp']==1].groupby([classification])[['diversity']].sum().reset_index(drop=False).copy().rename(columns={'diversity':'ki_1'}), 
                        on=[classification], how='left')
    kh1_ki1_df['kh_1'] = kh1_ki1_df['kh_1'] / kh1_ki1_df['diversity']
    kh1_ki1_df['ki_1'] = kh1_ki1_df['ki_1'] / kh1_ki1_df['ubiquity']
    kh_ki_df = kh1_ki1_df.copy()
    for i in range(n):
        kh_ki_df = pd.merge(kh_ki_df, 
                            kh_ki_df[kh_ki_df['mcp']==1].groupby([region_corporation])[[f'ki_{i+1}']].sum().reset_index(drop=False).copy()\
                                        .rename(columns={f'ki_{i+1}':f'kh_{i+2}'}), 
                            on=[region_corporation], how='left')
        kh_ki_df = pd.merge(kh_ki_df, 
                            kh_ki_df[kh_ki_df['mcp']==1].groupby([classification])[[f'kh_{i+1}']].sum().reset_index(drop=False).copy()\
                                        .rename(columns={f'kh_{i+1}':f'ki_{i+2}'}), 
                            on=[classification], how='left')
        kh_ki_df[f'kh_{i+2}'] = kh_ki_df[f'kh_{i+2}'] / kh_ki_df['diversity']
        kh_ki_df[f'ki_{i+2}'] = kh_ki_df[f'ki_{i+2}'] / kh_ki_df['ubiquity']
    return kh_ki_df


---


<a id=data></a>

## **2. オリジナルデータインポート**


In [7]:
schmoch_df = pd.read_csv(f'{ex_dir}35.csv', 
                         encoding='utf-8', 
                         sep=',', 
                         usecols=['Field_number', 'Field_en']
                         ).drop_duplicates()

In [8]:
reg_num_top_df = pd.read_csv(f'{data_dir}{input_condition}.csv', 
                             encoding='utf-8',
                             sep=',')
reg_num_top_df

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num
0,1981-2010,キヤノン株式会社,9,23723.000
1,1981-2010,キヤノン株式会社,28,9996.333
2,1981-2010,キヤノン株式会社,2,9525.167
3,1981-2010,キヤノン株式会社,6,7450.333
4,1981-2010,キヤノン株式会社,3,6352.500
...,...,...,...,...
105713,2001-2010,五洋建設株式会社,31,0.071
105714,2001-2010,四国電力株式会社,9,0.071
105715,2001-2010,北陸電力株式会社,9,0.071
105716,2001-2010,九州電力株式会社,31,0.071


In [9]:
reg_num_top_df[region_corporation].nunique()

1938

<a href=#top>先頭に戻る</a>

---


<a id=calculateindicator></a>

## **4. 各指標**


In [10]:
trade_cols = {'time':f'{ar}_{year_style}_period', 'loc':region_corporation, 'prod':classification, 'val':'reg_num'}
rename_col_dict = {'eci':'kci', 'pci':'tci'}
col_order_list = [f'{ar}_{year_style}_period', region_corporation, classification, 'reg_num', 'rca', 'mcp', 'diversity', 'ubiquity', 'kci', 'tci']


In [11]:
c_df = ecomplexity(reg_num_top_df,
                   cols_input = trade_cols, 
                   rca_mcp_threshold = 1)
# c_out_df = c_df.copy()
c_df = c_df[c_df['reg_num'] > 0]\
           .rename(columns=rename_col_dict)\
           [col_order_list]
c_df = pd.concat([kh_ki(c_df[c_df[f'{ar}_{year_style}_period'] == period], classification) for period in c_df[f'{ar}_{year_style}_period'].unique()], 
                 axis='index', 
                 ignore_index=True)

# for segment in c_df[f'{ar}_{year_style}_period'].unique():
#     display(c_df[c_df[f'{ar}_{year_style}_period'] == segment].head())
#     display(c_df[c_df[f'{ar}_{year_style}_period'] == segment].describe())
#     print(c_df[c_df[f'{ar}_{year_style}_period'] == segment].info())
#     print('\n')


1981-2010
1981-1990
1991-2000
2001-2010


In [12]:
c_df.sort_values(by=[f'{ar}_{year_style}_period', 'kci'], ascending=[True, False])

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
46325,1981-1990,杏林製薬株式会社,14,39.000,31.908,1,2,271,2.318,2.287,...,6.597,328.190,328.196,6.590,6.590,327.955,327.959,6.586,6.586,327.808
46326,1981-1990,杏林製薬株式会社,16,10.000,38.989,1,2,223,2.318,2.350,...,6.597,328.201,328.196,6.590,6.590,327.963,327.959,6.586,6.586,327.813
46327,1981-1990,杏林製薬株式会社,25,1.000,0.660,0,2,481,2.318,-0.033,...,6.597,327.594,328.196,6.581,6.590,327.585,327.959,6.581,6.586,327.583
41709,1981-1990,国立大学法人九州工業大学,14,4.000,40.907,1,1,271,2.287,2.287,...,6.597,328.190,328.190,6.590,6.590,327.955,327.955,6.586,6.586,327.808
49460,1981-1990,株式会社マンダム,14,1.000,40.907,1,1,271,2.287,2.287,...,6.597,328.190,328.190,6.590,6.590,327.955,327.955,6.586,6.586,327.808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98065,2001-2010,株式会社ナカヨ,7,1.000,0.622,0,3,229,-3.617,-1.888,...,7.104,335.786,335.294,7.109,7.108,336.047,335.785,7.111,7.110,336.185
98066,2001-2010,株式会社ナカヨ,10,1.000,0.070,0,3,457,-3.617,-0.439,...,7.104,336.211,335.294,7.111,7.108,336.271,335.785,7.112,7.110,336.303
98067,2001-2010,株式会社ナカヨ,12,1.000,0.191,0,3,307,-3.617,-1.719,...,7.104,335.868,335.294,7.109,7.108,336.088,335.785,7.110,7.110,336.205
98068,2001-2010,株式会社ナカヨ,34,0.333,0.081,0,3,260,-3.617,0.181,...,7.104,336.372,335.294,7.111,7.108,336.360,335.785,7.112,7.110,336.352


In [13]:
c_df[(c_df[classification]==22)&(c_df[f'{ar}_{year_style}_period']=='1981-1990')]

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
46311,1981-1990,本田技研工業株式会社,22,0.5,53.533,1,8,2,0.011,0.92,...,6.58,327.919,327.648,6.584,6.581,327.773,327.611,6.583,6.581,327.692
47987,1981-1990,株式会社クラレ,22,0.5,170.184,1,13,2,1.125,0.92,...,6.588,327.919,327.899,6.584,6.585,327.773,327.773,6.583,6.584,327.692


<a href="#top">先頭に戻る</a>

---


<a id=output></a>

## **5. ファイルに出力**

<a id=rightperson></a>

### **5.1. 特許権者**


In [14]:

right_person_df = pd.merge(c_df.groupby([f'{ar}_{year_style}_period', region_corporation])[['reg_num']].sum().reset_index(drop=False), 
                           c_df.groupby([f'{ar}_{year_style}_period', region_corporation])[[classification]].nunique().reset_index(drop=False), 
                           on=[f'{ar}_{year_style}_period', region_corporation], 
                           how='inner')
right_person_df = pd.merge(right_person_df, 
                           c_df[[f'{ar}_{year_style}_period', region_corporation, 'diversity', 'kci']\
                               +[f'kh_{i}' for i in range(1, 20+1)]]\
                               .drop_duplicates(keep='first'), 
                           on=[f'{ar}_{year_style}_period', region_corporation], 
                           how='inner')
# for period in right_person_df[f'{ar}_{year_style}_period'].unique():
#     right_person_df

# for period in right_person_df[f'{ar}_{year_style}_period'].unique():
#     for i in range(1, 20+1):
#         value = right_person_df[right_person_df[f'{ar}_{year_style}_period']==period]
#         right_person_df[right_person_df[f'{ar}_{year_style}_period']==period][f'kh_{i}'] = (value[f'kh_{i}'] - value[f'kh_{i}'].mean()) / value[f'kh_{i}'].std()
#     display(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].head())
#     display(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].describe())
#     print(right_person_df[right_person_df[f'{ar}_{year_style}_period'] == period].info())
#     print('\n')
# right_person_df['reg_num'] = right_person_df['reg_num'].astype(np.int64)

In [15]:
schmoch_df

Unnamed: 0,Field_number,Field_en
0,1,"Electrical machinery, apparatus, energy"
30,2,Audio-visual technology
48,3,Telecommunications
58,4,Digital communication
61,5,Basic communication processes
71,6,Computer technology
88,7,IT methods for management
89,8,Semiconductors
91,9,Optics
101,10,Measurement


In [16]:
sample = pd.merge(c_df, schmoch_df, 
         left_on=classification, right_on='Field_number', how='left')
sample[(sample[f'{ar}_{year_style}_period']==f'{year_start}-{year_end}')&(sample['Field_en']=='Digital communication')&\
    (sample['mcp']==1)]

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20,Field_number,Field_en
113,1981-2010,アイコム株式会社,4,47.500,8.862,1,6,150,-2.282,-3.771,...,358.209,7.310,7.313,358.289,358.706,7.314,7.316,358.752,4,Digital communication
219,1981-2010,アイホン株式会社,4,13.000,1.483,1,5,150,-2.253,-3.771,...,358.206,7.310,7.314,358.289,358.706,7.314,7.316,358.752,4,Digital communication
424,1981-2010,アズビル株式会社,4,70.000,2.032,1,9,150,-1.412,-3.771,...,358.628,7.310,7.315,358.289,358.941,7.314,7.317,358.752,4,Digital communication
563,1981-2010,アラクサラネツトワークス株式会社,4,235.500,70.758,1,1,150,-3.771,-3.771,...,357.461,7.310,7.310,358.289,358.289,7.314,7.314,358.752,4,Digital communication
681,1981-2010,アンリツ株式会社,4,208.250,6.596,1,7,150,-2.047,-3.771,...,358.308,7.310,7.314,358.289,358.763,7.314,7.316,358.752,4,Digital communication
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34229,1981-2010,ＳＣＳＫ株式会社,4,3.000,1.449,1,7,150,-1.825,-3.771,...,358.418,7.310,7.315,358.289,358.825,7.314,7.317,358.752,4,Digital communication
34291,1981-2010,Ｓｋｙ株式会社,4,4.000,2.285,1,4,150,-2.916,-3.771,...,357.881,7.310,7.312,358.289,358.524,7.314,7.315,358.752,4,Digital communication
34403,1981-2010,ＴＯＡ株式会社,4,24.000,5.063,1,7,150,-2.032,-3.771,...,358.322,7.310,7.314,358.289,358.770,7.314,7.316,358.752,4,Digital communication
34536,1981-2010,ＴＶＳＲＥＧＺＡ株式会社,4,21.750,4.498,1,4,150,-3.298,-3.771,...,357.694,7.310,7.311,358.289,358.419,7.314,7.315,358.752,4,Digital communication


In [17]:
right_person_df[right_person_df[f'{ar}_{year_style}_period']==f'{year_start}-{year_end}'].sort_values('kci', ascending=False)

Unnamed: 0,app_nendo_period,right_person_name,reg_num,schmoch35,diversity,kci,kh_1,kh_2,kh_3,kh_4,...,kh_11,kh_12,kh_13,kh_14,kh_15,kh_16,kh_17,kh_18,kh_19,kh_20
3120,1981-2010,株式会社ノエビア,409.833,12,3,1.552,309.667,8.267,371.901,7.944,...,362.757,7.364,361.330,7.342,360.488,7.331,359.999,7.325,359.717,7.322
3573,1981-2010,財団法人微生物化学研究会,179.000,7,3,1.526,338.667,8.488,372.250,7.997,...,362.672,7.365,361.283,7.343,360.462,7.331,359.984,7.325,359.709,7.323
2250,1981-2010,ヴイアトリス製薬株式会社,149.167,10,3,1.526,338.667,8.488,372.250,7.997,...,362.672,7.365,361.283,7.343,360.462,7.331,359.984,7.325,359.709,7.323
3671,1981-2010,ＭＳＤ株式会社,165.500,6,3,1.526,338.667,8.488,372.250,7.997,...,362.672,7.365,361.283,7.343,360.462,7.331,359.984,7.325,359.709,7.323
2424,1981-2010,協和発酵バイオ株式会社,192.367,11,4,1.502,327.750,8.383,372.569,7.945,...,362.640,7.362,361.262,7.341,360.449,7.331,359.976,7.325,359.704,7.322
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3718,1981-2010,ＴＶＳＲＥＧＺＡ株式会社,378.450,10,4,-3.298,187.500,6.873,259.889,7.065,...,349.909,7.281,354.075,7.297,356.398,7.306,357.694,7.311,358.419,7.315
3634,1981-2010,Ｄｙｎａｂｏｏｋ株式会社,707.167,17,4,-3.298,187.500,6.873,259.889,7.065,...,349.909,7.281,354.075,7.297,356.398,7.306,357.694,7.311,358.419,7.315
3414,1981-2010,株式会社ＫＤＤＩ総合研究所,198.333,7,4,-3.298,187.500,6.873,259.889,7.065,...,349.909,7.281,354.075,7.297,356.398,7.306,357.694,7.311,358.419,7.315
2391,1981-2010,八重洲無線株式会社,266.500,14,3,-3.389,171.000,7.160,258.478,7.131,...,349.686,7.281,353.944,7.296,356.322,7.306,357.651,7.311,358.395,7.315


In [18]:
right_person_df.to_csv(f'{output_dir}firms/{output_condition}.csv', 
                       encoding='utf-8', 
                       sep=',', 
                       index=False)


In [19]:
right_person_df

Unnamed: 0,app_nendo_period,right_person_name,reg_num,schmoch35,diversity,kci,kh_1,kh_2,kh_3,kh_4,...,kh_11,kh_12,kh_13,kh_14,kh_15,kh_16,kh_17,kh_18,kh_19,kh_20
0,1981-1990,あすか製薬株式会社,92.500,8,6,1.811,255.500,7.260,314.233,6.995,...,329.324,6.620,328.767,6.603,328.346,6.593,328.060,6.588,327.875,6.585
1,1981-1990,いすゞ自動車株式会社,1375.143,23,4,-0.104,295.000,6.472,337.986,6.471,...,328.541,6.572,327.985,6.576,327.744,6.579,327.645,6.580,327.605,6.581
2,1981-1990,しげる工業株式会社,19.167,7,7,0.013,357.286,6.137,344.999,6.398,...,328.481,6.574,327.980,6.578,327.756,6.580,327.659,6.581,327.616,6.581
3,1981-1990,ぺんてる株式会社,587.000,23,9,0.614,339.111,6.789,326.213,6.712,...,327.993,6.594,327.888,6.588,327.792,6.585,327.719,6.583,327.670,6.582
4,1981-1990,みのる産業株式会社,38.000,2,2,0.089,365.000,6.263,351.260,6.434,...,328.786,6.575,328.132,6.579,327.834,6.580,327.699,6.581,327.638,6.581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7539,2001-2010,ＵＢＥ株式会社,1328.833,30,10,0.871,376.800,7.958,354.161,7.413,...,337.624,7.127,337.033,7.120,336.715,7.116,336.543,7.114,336.450,7.113
7540,2001-2010,ＵＤトラツクス株式会社,787.500,20,6,-0.270,313.833,6.466,337.249,6.785,...,336.389,7.094,336.314,7.103,336.302,7.108,336.310,7.110,336.319,7.111
7541,2001-2010,ＹＫＫ株式会社,372.083,19,2,0.128,247.500,6.474,319.938,6.884,...,336.228,7.109,336.315,7.111,336.340,7.112,336.346,7.112,336.346,7.112
7542,2001-2010,ＹＫＫＡＰ株式会社,704.833,15,1,0.427,413.000,5.729,372.865,6.601,...,337.727,7.093,337.014,7.103,336.674,7.108,336.508,7.110,336.426,7.111


<a href=#top>先頭に戻る</a>

---


<a id=ipc></a>

### **5.2. IPC**


In [20]:
schmoch_df = pd.read_csv(f'{ex_dir}35.csv', 
                         encoding='utf-8', 
                         sep=',', 
                         usecols=['Field_number', 'Field_en']
                         ).drop_duplicates()


In [21]:
# 各期間
classification_df = pd.merge(c_df.groupby([f'{ar}_{year_style}_period', classification])[['reg_num']].sum().reset_index(drop=False), 
                        c_df.groupby([f'{ar}_{year_style}_period', classification])[[region_corporation]].nunique().reset_index(drop=False), 
                        on=[f'{ar}_{year_style}_period', classification], 
                        how='inner')
classification_df = pd.merge(classification_df, 
                      c_df[[f'{ar}_{year_style}_period', classification, 'ubiquity', 'tci']\
                          +[f'ki_{i}' for i in range(1, 20+1)]]\
                          .drop_duplicates(keep='first'), 
                      on=[f'{ar}_{year_style}_period', classification], 
                      how='inner')
# classification_df['reg_num'] = classification_df['reg_num'].astype(np.int64)
classification_df = pd.merge(classification_df, 
                            schmoch_df.rename(columns={'Field_number':classification}), 
                            on=[classification], 
                            how='inner')\
                            .drop(columns=[classification])\
                            .rename(columns={'Field_en':classification})
display(classification_df)


Unnamed: 0,app_nendo_period,reg_num,right_person_name,ubiquity,tci,ki_1,ki_2,ki_3,ki_4,ki_5,...,ki_12,ki_13,ki_14,ki_15,ki_16,ki_17,ki_18,ki_19,ki_20,schmoch35
0,1981-1990,53258.648,917,306,-1.355,5.843,305.468,6.282,314.287,6.432,...,326.122,6.566,326.738,6.572,327.096,6.576,327.303,6.578,327.423,"Electrical machinery, apparatus, energy"
1,1981-2010,218723.591,1449,356,-0.971,6.469,341.390,7.049,346.891,7.196,...,357.870,7.308,358.500,7.313,358.863,7.316,359.070,7.317,359.188,"Electrical machinery, apparatus, energy"
2,1991-2000,64133.212,1068,341,-0.988,6.144,327.815,6.576,330.655,6.669,...,338.989,6.734,339.555,6.738,339.891,6.739,340.089,6.740,340.204,"Electrical machinery, apparatus, energy"
3,2001-2010,101331.731,1221,350,-0.797,6.331,331.016,6.906,331.301,7.032,...,335.682,7.106,335.970,7.109,336.135,7.110,336.228,7.111,336.279,"Electrical machinery, apparatus, energy"
4,1981-1990,52583.581,676,169,-2.368,5.935,251.112,6.276,288.631,6.407,...,324.371,6.558,325.814,6.567,326.602,6.573,327.036,6.576,327.277,Audio-visual technology
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,2001-2010,17092.000,704,260,0.181,6.281,317.877,6.770,331.605,6.979,...,336.403,7.108,336.390,7.111,336.372,7.111,336.360,7.112,336.352,Other consumer goods
136,1981-1990,27729.362,785,379,0.077,5.636,364.736,6.280,345.697,6.469,...,328.349,6.577,327.931,6.580,327.742,6.580,327.657,6.581,327.619,Civil engineering
137,1981-2010,112115.690,1235,412,0.459,5.684,389.048,6.720,372.875,7.072,...,360.199,7.310,359.798,7.315,359.587,7.318,359.474,7.319,359.414,Civil engineering
138,1991-2000,46222.454,988,414,0.381,5.556,375.517,6.324,356.237,6.578,...,341.296,6.737,340.848,6.740,340.618,6.741,340.499,6.741,340.436,Civil engineering


In [22]:
classification_df.to_csv(f'{output_dir}technology/{output_condition}.csv', 
                        encoding='utf-8', 
                        sep=',', 
                        index=False)


<a href=#top>先頭に戻る</a>

---


<a id=network></a>

## **5.3. 二部グラフ用**


In [24]:
# eneos_df = c_df[(c_df[f'{ar}_{year_style}_period']==f'{year_start}-{year_end}')&(c_df[region_corporation].str.contains('ＥＮＥＯＳ'))\
#                 &(c_df['mcp']==1)].copy()#[[region_corporation, 'reg_num', 'schmoch35']].copy()
# eneos_df = pd.merge(eneos_df, 
#                     schmoch_df.rename(columns={'Field_number':'schmoch35'})\
#                               .drop_duplicates(keep='first'), 
#                     on=['schmoch35'], 
#                     how='inner')
# eneos_df[['ubiquity', 'Field_en', 'ki_1']]

In [59]:
c_df

Unnamed: 0,app_nendo_period,right_person_name,schmoch35,reg_num,rca,mcp,diversity,ubiquity,kci,tci,...,kh_16,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20
0,1981-2010,あすか製薬株式会社,7,0.333,0.647,0,5,249,1.418,-1.843,...,7.330,358.406,359.939,7.315,7.325,358.818,359.683,7.316,7.322,359.049
1,1981-2010,あすか製薬株式会社,10,3.500,0.374,0,5,481,1.418,-0.564,...,7.330,359.043,359.939,7.317,7.325,359.173,359.683,7.318,7.322,359.247
2,1981-2010,あすか製薬株式会社,11,15.000,36.464,1,5,372,1.418,1.081,...,7.330,359.790,359.939,7.323,7.325,359.598,359.683,7.321,7.322,359.487
3,1981-2010,あすか製薬株式会社,13,2.000,0.558,0,5,292,1.418,0.567,...,7.330,359.558,359.939,7.321,7.325,359.466,359.683,7.320,7.322,359.413
4,1981-2010,あすか製薬株式会社,14,70.000,23.694,1,5,339,1.418,1.586,...,7.330,360.011,359.939,7.326,7.325,359.724,359.683,7.323,7.322,359.559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105713,2001-2010,Ｚホールデイングス株式会社,29,0.500,0.039,0,4,499,-2.753,0.841,...,7.105,336.565,335.547,7.113,7.108,336.460,335.920,7.112,7.110,336.404
105714,2001-2010,Ｚホールデイングス株式会社,30,0.500,0.054,0,4,254,-2.753,0.302,...,7.105,336.456,335.547,7.111,7.108,336.399,335.920,7.111,7.110,336.371
105715,2001-2010,Ｚホールデイングス株式会社,32,0.500,0.021,0,4,229,-2.753,-0.195,...,7.105,336.372,335.547,7.109,7.108,336.350,335.920,7.110,7.110,336.342
105716,2001-2010,Ｚホールデイングス株式会社,33,2.500,0.110,0,4,183,-2.753,-0.426,...,7.105,336.245,335.547,7.107,7.108,336.294,335.920,7.110,7.110,336.317


In [23]:
graph_df = pd.merge(c_df, schmoch_df, 
                    left_on=classification, right_on='Field_number', how='left')\
                    .drop(columns=['Field_number', classification])\
                    .rename(columns={'Field_en':classification})
# graph_df = graph_df[graph_df['mcp']==1][[f'{ar}_{year_style}', region_corporation, 'ipc_class', 'mcp']]
graph_df

Unnamed: 0,app_nendo_period,right_person_name,reg_num,rca,mcp,diversity,ubiquity,kci,tci,kh_1,...,ki_16,kh_17,ki_17,kh_18,ki_18,kh_19,ki_19,kh_20,ki_20,schmoch35
0,1981-2010,あすか製薬株式会社,0.333,0.647,0,5,249,1.418,-1.843,336.600,...,358.406,359.939,7.315,7.325,358.818,359.683,7.316,7.322,359.049,IT methods for management
1,1981-2010,あすか製薬株式会社,3.500,0.374,0,5,481,1.418,-0.564,336.600,...,359.043,359.939,7.317,7.325,359.173,359.683,7.318,7.322,359.247,Measurement
2,1981-2010,あすか製薬株式会社,15.000,36.464,1,5,372,1.418,1.081,336.600,...,359.790,359.939,7.323,7.325,359.598,359.683,7.321,7.322,359.487,Analysis of biological materials
3,1981-2010,あすか製薬株式会社,2.000,0.558,0,5,292,1.418,0.567,336.600,...,359.558,359.939,7.321,7.325,359.466,359.683,7.320,7.322,359.413,Medical technology
4,1981-2010,あすか製薬株式会社,70.000,23.694,1,5,339,1.418,1.586,336.600,...,360.011,359.939,7.326,7.325,359.724,359.683,7.323,7.322,359.559,Organic fine chemistry
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105713,2001-2010,Ｚホールデイングス株式会社,0.500,0.039,0,4,499,-2.753,0.841,218.000,...,336.565,335.547,7.113,7.108,336.460,335.920,7.112,7.110,336.404,Other special machines
105714,2001-2010,Ｚホールデイングス株式会社,0.500,0.054,0,4,254,-2.753,0.302,218.000,...,336.456,335.547,7.111,7.108,336.399,335.920,7.111,7.110,336.371,Thermal processes and apparatus
105715,2001-2010,Ｚホールデイングス株式会社,0.500,0.021,0,4,229,-2.753,-0.195,218.000,...,336.372,335.547,7.109,7.108,336.350,335.920,7.110,7.110,336.342,Transport
105716,2001-2010,Ｚホールデイングス株式会社,2.500,0.110,0,4,183,-2.753,-0.426,218.000,...,336.245,335.547,7.107,7.108,336.294,335.920,7.110,7.110,336.317,"Furniture, games"


In [47]:
all_edge_df = graph_df[(graph_df[f'{ar}_{year_style}_period'] == f'{year_start}-{year_end}')&(graph_df['mcp'] == 1)].copy()\
                      [[region_corporation, classification, 'mcp']].rename(columns={'mcp':'Weight'})
all_edge_df['Type'] = 'Undirected'
all_edge_df

all_node_list = list(all_edge_df[region_corporation].unique()) + list(all_edge_df[classification].unique())
all_flag_list = [0] * len(all_edge_df[region_corporation].unique()) + [1] * len(all_edge_df[classification].unique())
all_node_df = pd.DataFrame(all_node_list, columns=['label']).reset_index(drop=False).rename(columns={'index':'node_id'})
all_node_df['projected'] = all_flag_list
all_node_df['node_id'] += 1

all_edge_df = pd.merge(all_edge_df, all_node_df, left_on=region_corporation, right_on='label', how='left').rename(columns={'node_id':'Source'})
all_edge_df = pd.merge(all_edge_df, all_node_df, left_on=classification, right_on='label', how='left').rename(columns={'node_id':'Target'})

all_edge_df = all_edge_df[['Source', 'Target', 'Type', 'Weight']]


In [48]:
all_node_df.to_csv(f'{output_dir}graph/{output_condition}_node.csv', 
                     encoding='utf-8', 
                     sep=',', 
                     index=False)
all_edge_df.to_csv(f'{output_dir}graph/{output_condition}_edge.csv',
                     encoding='utf-8',
                     sep=',',
                     index=False)


In [61]:
# graph_df.to_csv(f'{output_dir}graph/{output_condition}.csv', 
#                 encoding='utf-8', 
#                 sep=',', 
#                 index=False)
# graph_df
