### 1.导入数据

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import geopandas as gpd
%matplotlib inline

In [2]:
#step1.读取上市公司和子公司数据，并进行连接
df_com = pd.read_excel(r'../../file/subject/chapter3/SSGScompany04.xlsx')
df_son = pd.read_excel(r'../../file/subject/chapter3/company_info_all05.xlsx')
df = pd.merge(df_son,df_com,left_on='上市公司名称',right_on='公司名称2').reset_index()
#step2.筛选正常营业公司
df['公司状态'] = df['公司状态'].apply(lambda x:'开业' if x == 0 else x)
df = df[-df['公司状态'].str.startswith('吊销')]
#step3.筛选非同城公司以建立关联
city_net = df[['city_x','city_y']].rename(columns={'city_x':'ZGS_city','city_y':'SSGS_city'})
city_net['count'] = 1
city_net = city_net[-(city_net['ZGS_city'] == city_net['SSGS_city'])]
#step4.处理错误城市数据
city_net = city_net[-(city_net.ZGS_city.isin([0,'','']) | city_net.ZGS_city.isna())]
city_net.ZGS_city = city_net.ZGS_city.apply(lambda x:np.nan if x.isascii() else x)
city_net.dropna(inplace=True)

### 2.出入度分析

#### 2.1出度分析
出度：上市公司对外投资程度

In [3]:
city_net2 = city_net.copy()
city_net2.head()

Unnamed: 0,ZGS_city,SSGS_city,count
0,上海市,兰州市,1
9,张掖市,兰州市,1
15,酒泉市,兰州市,1
17,武威市,兰州市,1
21,张掖市,兰州市,1


In [7]:
city_net2.groupby(by='SSGS_city')['SSGS_city'].count().sort_values(ascending=False).head()

SSGS_city
北京市    6760
深圳市    2763
上海市    2114
杭州市    1025
广州市     937
Name: SSGS_city, dtype: int64

In [8]:
sum(city_net2.groupby(by='SSGS_city')['SSGS_city'].count().sort_values(ascending=False))

33776

#### 2.2入度分析
入度：接受上市公司投资的程度,总出度=总入度

In [9]:
city_net2.groupby(by='ZGS_city')['ZGS_city'].count().sort_values(ascending=False).head()

ZGS_city
上海市    2569
深圳市    2458
北京市    2244
杭州市     846
天津市     795
Name: ZGS_city, dtype: int64

In [10]:
sum(city_net2.groupby(by='ZGS_city')['ZGS_city'].count().sort_values(ascending=False))

33776

#### 2.3合并为DadaFrame对象

In [11]:
city_out = city_net2.groupby(by='SSGS_city')['SSGS_city'].count().sort_values(ascending=False)
city_in = city_net2.groupby(by='ZGS_city')['ZGS_city'].count().sort_values(ascending=False)

In [26]:
#Series转DataFrame
city_in = city_in.to_frame(name='in').reset_index()
city_out = city_out.to_frame(name='out').reset_index()

In [28]:
in_out = pd.merge(city_in,city_out,left_on='ZGS_city',right_on='SSGS_city')
in_out.head()

Unnamed: 0,ZGS_city,in,SSGS_city,out
0,上海市,2569,上海市,2114
1,深圳市,2458,深圳市,2763
2,北京市,2244,北京市,6760
3,杭州市,846,杭州市,1025
4,天津市,795,天津市,361


In [30]:
in_out = in_out[['ZGS_city','in','out']]
in_out['all'] = in_out['in'] + in_out['out']
in_out.head()

Unnamed: 0,ZGS_city,in,out,all
0,上海市,2569,2114,4683
1,深圳市,2458,2763,5221
2,北京市,2244,6760,9004
3,杭州市,846,1025,1871
4,天津市,795,361,1156


In [31]:
in_out.to_excel(r'../../file/subject/chapter3/in_out.xlsx',sheet_name='Sheet1')

### 3.四类城市组织中的节点连接排序

#### 3.1TOP1网络

In [66]:
city_net3.groupby(by=['SSGS_city','ZGS_city']).sum().reset_index().sort_values(by='count').tail(10)

Unnamed: 0,SSGS_city,ZGS_city,count
68,上海市,北京市,142
1225,北京市,重庆市,166
1073,北京市,成都市,169
1110,北京市,武汉市,176
5241,深圳市,上海市,218
5267,深圳市,北京市,224
1026,北京市,天津市,239
181,上海市,深圳市,270
952,北京市,上海市,423
1136,北京市,深圳市,885


In [63]:
city_net3.groupby(by=['SSGS_city','ZGS_city']).sum().reset_index().sort_values(by='count').tail(10).sort_values(by=['SSGS_city','count'])

Unnamed: 0,SSGS_city,ZGS_city,count
68,上海市,北京市,142
181,上海市,深圳市,270
1225,北京市,重庆市,166
1073,北京市,成都市,169
1110,北京市,武汉市,176
1026,北京市,天津市,239
952,北京市,上海市,423
1136,北京市,深圳市,885
5241,深圳市,上海市,218
5267,深圳市,北京市,224


In [64]:
city_net3.groupby(by=['SSGS_city','ZGS_city']).sum().reset_index().sort_values(by='count').tail(10).sort_values(by=['SSGS_city','count'],ascending=[True,False])

Unnamed: 0,SSGS_city,ZGS_city,count
181,上海市,深圳市,270
68,上海市,北京市,142
1136,北京市,深圳市,885
952,北京市,上海市,423
1026,北京市,天津市,239
1110,北京市,武汉市,176
1073,北京市,成都市,169
1225,北京市,重庆市,166
5267,深圳市,北京市,224
5241,深圳市,上海市,218


In [77]:
city_net3.groupby(by=['SSGS_city','ZGS_city']).sum().reset_index().sort_values(by='count').tail(20).sort_values(by=['SSGS_city','ZGS_city'],ascending=[True,False]).groupby(level=0).head(2)

Unnamed: 0,SSGS_city,ZGS_city,count
215,上海市,苏州市,110
181,上海市,深圳市,270
68,上海市,北京市,142
1225,北京市,重庆市,166
1200,北京市,西安市,109
1136,北京市,深圳市,885
1110,北京市,武汉市,176
1099,北京市,杭州市,124
1073,北京市,成都市,169
1058,北京市,广州市,140


In [92]:
city_net3.head(15).groupby(by=['SSGS_city','ZGS_city']).head(1)

Unnamed: 0,ZGS_city,SSGS_city,count,SSGS_ZGS
0,上海市,兰州市,1,"[上海市, 兰州市]"
9,张掖市,兰州市,1,"[张掖市, 兰州市]"
15,酒泉市,兰州市,1,"[酒泉市, 兰州市]"
17,武威市,兰州市,1,"[武威市, 兰州市]"
22,重庆市,兰州市,1,"[重庆市, 兰州市]"
23,通辽市,兰州市,1,"[通辽市, 兰州市]"
70,淄博市,包头市,1,"[淄博市, 包头市]"


In [93]:
city_net3.head(15).groupby(by=['SSGS_city','ZGS_city']).head(2)

Unnamed: 0,ZGS_city,SSGS_city,count,SSGS_ZGS
0,上海市,兰州市,1,"[上海市, 兰州市]"
9,张掖市,兰州市,1,"[张掖市, 兰州市]"
15,酒泉市,兰州市,1,"[酒泉市, 兰州市]"
17,武威市,兰州市,1,"[武威市, 兰州市]"
21,张掖市,兰州市,1,"[张掖市, 兰州市]"
22,重庆市,兰州市,1,"[重庆市, 兰州市]"
23,通辽市,兰州市,1,"[通辽市, 兰州市]"
33,上海市,兰州市,1,"[上海市, 兰州市]"
48,酒泉市,兰州市,1,"[酒泉市, 兰州市]"
50,武威市,兰州市,1,"[武威市, 兰州市]"


In [104]:
city_net3.head(15).groupby(by=['SSGS_city','ZGS_city']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
SSGS_city,ZGS_city,Unnamed: 2_level_1
兰州市,上海市,2
兰州市,张掖市,4
兰州市,武威市,2
兰州市,通辽市,2
兰州市,酒泉市,2
兰州市,重庆市,2
包头市,淄博市,1


In [108]:
city_net3.head(15).groupby(by=['SSGS_city','ZGS_city']).sum().groupby(level=0).head(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
SSGS_city,ZGS_city,Unnamed: 2_level_1
兰州市,上海市,2
兰州市,张掖市,4
兰州市,武威市,2
兰州市,通辽市,2
包头市,淄博市,1


### 3.

#### 3.1将上市公司和子公司所在城市存入列表

In [32]:
city_net3 = city_net2.copy()

In [38]:
city_net3.apply(lambda x:[x[0],x[1]],axis=1)

0        [上海市, 兰州市]
9        [张掖市, 兰州市]
15       [酒泉市, 兰州市]
17       [武威市, 兰州市]
21       [张掖市, 兰州市]
            ...    
57257    [东莞市, 宁德市]
57258    [北京市, 宁德市]
57259    [武汉市, 宁德市]
57260    [北京市, 宁德市]
57261    [泉州市, 宁德市]
Length: 33776, dtype: object

In [39]:
city_net3['SSGS_ZGS'] = city_net3.apply(lambda x:[x[0],x[1]],axis=1)

In [40]:
city_net3.head()

Unnamed: 0,ZGS_city,SSGS_city,count,SSGS_ZGS
0,上海市,兰州市,1,"[上海市, 兰州市]"
9,张掖市,兰州市,1,"[张掖市, 兰州市]"
15,酒泉市,兰州市,1,"[酒泉市, 兰州市]"
17,武威市,兰州市,1,"[武威市, 兰州市]"
21,张掖市,兰州市,1,"[张掖市, 兰州市]"


#### 3.2城市投资意向

In [46]:
city_net3.groupby(by=['SSGS_city','ZGS_city']).sum().reset_index().sort_values(by='count',ascending=False)

Unnamed: 0,SSGS_city,ZGS_city,count
1136,北京市,深圳市,885
952,北京市,上海市,423
181,上海市,深圳市,270
1026,北京市,天津市,239
5267,深圳市,北京市,224
...,...,...,...
3701,成都市,芜湖市,1
3698,成都市,绥化市,1
3697,成都市,绍兴市,1
3696,成都市,红河哈尼族彝族自治州,1
