## Loading data into Pandas DataFrame

In [14]:
import pandas as pd
import numpy as np

In [15]:
! gdown --id 1rsAH_B4iFuw_KuyhEFcF4uAqywe2Uq-f

Downloading...
From: https://drive.google.com/uc?id=1rsAH_B4iFuw_KuyhEFcF4uAqywe2Uq-f
To: /content/cs_scientist_ranks.xlsx
100% 273k/273k [00:00<00:00, 17.3MB/s]


In [16]:
df = pd.read_excel("/content/cs_scientist_ranks.xlsx")
df.Citations = df.Citations.apply(lambda x : x.replace(',',''))
df

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1995,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1996,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1997,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1998,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


## DataFrames
DataFrame is very similar to Excel workbook Tabular Datasheets starting from index 0. <br/>

Operations: <br/>
i) **df.shape** => dimension of the dataframe <br/>
ii) **df.head(n)** => top n records/rows <br/>
iii) **df.tail(n)** => bottom n records/rows <br/>
iv) **df.columns** => all column names <br/>
v) **df['column_name']** => access data from particular column name <br/>
vi) **df[['column_name_1','column_name_2',...]]** => access data from multiple columns

In [17]:
df.shape

(2000, 10)

In [18]:
df.head(3) # default n=5

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...


In [19]:
df.tail(3) # default n=5

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
1997,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1998,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...
1999,1999,2000,79,Jonathan Li,University of Waterloo,Canada,61,11276,505,https://s.research.com/images/90a9c3c82b5717c5...


In [20]:
df.columns

Index(['Unnamed: 0', 'World Rank', 'National Rank', 'Name', 'Affiliation',
       'Country', 'D-Index', 'Citations', 'Number of Publications',
       'Image URLs'],
      dtype='object')

In [21]:
df['Name']

0            Anil K. Jain
1           Yoshua Bengio
2              Jiawei Han
3       Michael I. Jordan
4        Andrew Zisserman
              ...        
1995       Cheng-Zhong Xu
1996            Debiao He
1997             Shuai Li
1998        Cunsheng Ding
1999          Jonathan Li
Name: Name, Length: 2000, dtype: object

In [22]:
df[['Name','Affiliation']]

Unnamed: 0,Name,Affiliation
0,Anil K. Jain,Michigan State University
1,Yoshua Bengio,University of Montreal
2,Jiawei Han,University of Illinois at Urbana-Champaign
3,Michael I. Jordan,University of California
4,Andrew Zisserman,University of Oxford
...,...,...
1995,Cheng-Zhong Xu,University of Macau
1996,Debiao He,Wuhan University
1997,Shuai Li,University of Oulu
1998,Cunsheng Ding,Hong Kong University of Science and Technology


## Indexing

In [23]:
df.iloc[:3]

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...


In [24]:
df.iloc[3]

Unnamed: 0                                                                3
World Rank                                                                4
National Rank                                                             3
Name                                                      Michael I. Jordan
Affiliation                                        University of California
Country                                                            Berkeley
D-Index                                                                 176
Citations                                                            220056
Number of Publications                                                  776
Image URLs                https://s.research.com/images/56e4335ff90165e8...
Name: 3, dtype: object

In [25]:
# Second top 50 Names, Image URLs and affiliation

df.iloc[51:101,2:5]

Unnamed: 0,National Rank,Name,Affiliation
51,4,Bernt Schiele,Max Planck Institute for Informatics
52,31,Albert-László Barabási,Northeastern University
53,32,Andrew Y. Ng,Stanford University
54,33,Deborah Estrin,Cornell University
55,1,Shuicheng Yan,National University of Singapore
56,6,Hong-Jiang Zhang,ByteDance
57,34,Hari Balakrishnan,MIT
58,35,Richard Szeliski,University of Washington
59,7,Qiang Yang,Hong Kong University of Science and Technology
60,36,Christos H. Papadimitriou,Columbia University


In [26]:
# Same thing but in a different way
df[['Name', 'Image URLs', 'Affiliation']].iloc[51:101]

Unnamed: 0,Name,Image URLs,Affiliation
51,Bernt Schiele,https://s.research.com/images/41c1d9d732c58c3e...,Max Planck Institute for Informatics
52,Albert-László Barabási,https://s.research.com/images/27d8857b608d201a...,Northeastern University
53,Andrew Y. Ng,https://s.research.com/images/655f85eed1748af3...,Stanford University
54,Deborah Estrin,https://s.research.com/images/c003430f792bccdb...,Cornell University
55,Shuicheng Yan,https://s.research.com/images/fe2b011dcfed8f46...,National University of Singapore
56,Hong-Jiang Zhang,https://s.research.com/images/86cd0138a90d04ce...,ByteDance
57,Hari Balakrishnan,https://s.research.com/images/06ffc7e8aa355f47...,MIT
58,Richard Szeliski,https://s.research.com/images/eea998d7614c5445...,University of Washington
59,Qiang Yang,https://s.research.com/images/6856b85aba961817...,Hong Kong University of Science and Technology
60,Christos H. Papadimitriou,https://s.research.com/images/16e9bdfee66feaee...,Columbia University


In [27]:
df.head()

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...


In [29]:
# Get all with D-Index > 150

df[ df['D-Index'] > 150 ]

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
5,5,6,4,Philip S. Yu,University of Illinois at Chicago,United States,166,131899,1814,https://s.research.com/images/20815a3765631eb9...
6,6,7,5,Thomas S. Huang,University of Illinois at Urbana-Champaign,United States,165,122431,1329,https://s.research.com/images/ddd0e184e7830de1...
7,7,8,6,Takeo Kanade,Carnegie Mellon University,United States,161,125670,751,https://s.research.com/images/6f00a60e4fcd5fbf...
8,8,9,1,Francisco Herrera,University of Granada,Spain,161,111307,1008,https://s.research.com/images/900c497d066eeefc...
9,9,10,1,Wil M. P. van der Aalst,RWTH Aachen University,Germany,157,118010,1038,https://s.research.com/images/fe632259056863c6...


## Data Analysis

In [30]:
df['Affiliation'].value_counts()

University of California              125
MIT                                    62
Carnegie Mellon University             60
Microsoft (United States)              54
Stanford University                    52
                                     ... 
Oregon Health & Science University      1
University of Genoa                     1
Télécom SudParis                        1
University of Quebec at Montreal        1
National Cheng Kung University          1
Name: Affiliation, Length: 558, dtype: int64

In [31]:
df['Country'].value_counts()

 United States     923
 China             188
 United Kingdom    112
 Germany            82
 Canada             77
                  ... 
 Estonia             1
 Hyderabad           1
 Turkey              1
 Jordan              1
 Cyprus              1
Name: Country, Length: 66, dtype: int64

In [32]:
df.isnull().values.any()

True

In [33]:
# Check if there is any missing value
df.isnull().sum()

Unnamed: 0                0
World Rank                0
National Rank             0
Name                      0
Affiliation               0
Country                   1
D-Index                   0
Citations                 0
Number of Publications    0
Image URLs                0
dtype: int64

In [34]:
df = df.dropna(how="any") # drop all null values
df

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1995,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1996,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1997,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1998,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


In [35]:
df = df.reset_index(drop=True)
df

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1994,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1995,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1996,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1997,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


# Sorting

In [36]:
name_sorted_df = df.sort_values(by='Name')
name_sorted_df.head()

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
1671,1671,1672,11,Aapo Hyvärinen,University of Helsinki,Finland,63,65067,284,https://s.research.com/images/e3bec7c2d7ee4364...
472,472,473,21,Aaron Courville,University of Montreal,Canada,84,142816,228,https://s.research.com/images/ba1916a4e8734172...
1249,1249,1250,48,Aaron Fenster,University of Western Ontario,Canada,69,16108,543,https://s.research.com/images/91355b6c18a08d9f...
1585,1585,1586,882,Aaron Hertzmann,Adobe Systems (United States),United States,64,22872,161,https://s.research.com/images/77b4e7d37b48bde8...
994,994,995,580,Abbas El Gamal,Stanford University,United States,72,28354,243,https://s.research.com/images/c7067edb7e65fb89...


In [37]:
name_sorted_df = df.sort_values(by='Name',ascending=False)
name_sorted_df.head()

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
1417,1417,1418,798,Éva Tardos,Cornell University,United States,66,38112,182,https://s.research.com/images/8489a5fc44f4a3cf...
1987,1988,1989,1078,s muthukrishnan,Rutgers,The State University of New Jersey,61,12629,155,https://s.research.com/images/8924f9ccdd92309f...
792,792,793,1,gonzalo navarro,University of Chile,Chile,76,25496,493,https://s.research.com/images/b791f56c75f53b23...
1732,1732,1733,948,bo li,University of Illinois at Urbana-Champaign,United States,63,17803,568,https://s.research.com/images/e6fe342d1ec270e1...
731,731,732,434,Zygmunt J. Haas,The University of Texas at Dallas,United States,77,36314,321,https://s.research.com/images/d4ca1694e43e1705...


In [38]:
name_sorted_df = df.sort_values(by=['Name','Affiliation'])
name_sorted_df.head()

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
1671,1671,1672,11,Aapo Hyvärinen,University of Helsinki,Finland,63,65067,284,https://s.research.com/images/e3bec7c2d7ee4364...
472,472,473,21,Aaron Courville,University of Montreal,Canada,84,142816,228,https://s.research.com/images/ba1916a4e8734172...
1249,1249,1250,48,Aaron Fenster,University of Western Ontario,Canada,69,16108,543,https://s.research.com/images/91355b6c18a08d9f...
1585,1585,1586,882,Aaron Hertzmann,Adobe Systems (United States),United States,64,22872,161,https://s.research.com/images/77b4e7d37b48bde8...
994,994,995,580,Abbas El Gamal,Stanford University,United States,72,28354,243,https://s.research.com/images/c7067edb7e65fb89...


In [39]:
name_sorted_df = df.sort_values(by=['Affiliation','Name'])
name_sorted_df.head()

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
1611,1611,1612,893,Balachander Krishnamurthy,AT&T (United States),United States,64,18318,172,https://s.research.com/website/1049114443/imag...
1045,1045,1046,605,David S. Johnson,AT&T (United States),United States,71,112477,155,https://s.research.com/images/75ee6813942b9e9b...
360,360,361,218,Divesh Srivastava,AT&T (United States),United States,90,30362,434,https://s.research.com/images/a23448733fc7b2f9...
313,313,314,1,Christian S. Jensen,Aalborg University,Denmark,93,30669,553,https://s.research.com/images/e801bdf772a905ac...
583,583,584,2,Kim Guldstrand Larsen,Aalborg University,Denmark,81,29538,644,https://s.research.com/images/69d9fd07b90d4549...


In [40]:
name_sorted_df.sort_index(inplace=True)
name_sorted_df

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1994,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1995,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1996,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1997,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


# Concatenation

In [41]:
df_concat = pd.concat([df,name_sorted_df])
df_concat

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1994,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1995,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1996,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1997,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


In [42]:
df_concat = pd.concat([df,name_sorted_df],keys=['normal','sorted'])
df_concat

Unnamed: 0.1,Unnamed: 1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
normal,0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
normal,1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
normal,2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
normal,3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
normal,4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...,...
sorted,1994,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
sorted,1995,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
sorted,1996,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
sorted,1997,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


In [43]:
df_concat.loc['sorted']

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1994,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1995,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1996,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1997,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


In [44]:
df_concat.loc['normal']

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1994,1995,1996,192,Cheng-Zhong Xu,University of Macau,China,61,11960,514,https://s.research.com/images/d1652ee8cb26ff97...
1995,1996,1997,193,Debiao He,Wuhan University,China,61,11922,228,https://s.research.com/images/97930f238ef7d9b1...
1996,1997,1998,1082,Shuai Li,University of Oulu,Finland,61,11729,395,https://s.research.com/images/607b8a3af140d2e5...
1997,1998,1999,194,Cunsheng Ding,Hong Kong University of Science and Technology,China,61,11595,228,https://s.research.com/images/1c011d0a81a71824...


# Apply Function

In [45]:
df.apply(lambda x: x[0])

Unnamed: 0                                                                0
World Rank                                                                1
National Rank                                                             1
Name                                                           Anil K. Jain
Affiliation                                       Michigan State University
Country                                                       United States
D-Index                                                                 203
Citations                                                            250990
Number of Publications                                                  970
Image URLs                https://s.research.com/images/2a967b4499197336...
dtype: object

In [46]:
df.apply(lambda x: x[0],axis=1)

0          0
1          1
2          2
3          3
4          4
        ... 
1994    1995
1995    1996
1996    1997
1997    1998
1998    1999
Length: 1999, dtype: int64

In [47]:
df.apply(lambda x: x['Affiliation'],axis=1)

0                            Michigan State University
1                               University of Montreal
2           University of Illinois at Urbana-Champaign
3                             University of California
4                                 University of Oxford
                             ...                      
1994                               University of Macau
1995                                  Wuhan University
1996                                University of Oulu
1997    Hong Kong University of Science and Technology
1998                            University of Waterloo
Length: 1999, dtype: object

In [48]:
def national_top_three(rank):
  return rank <=3

national_rank = df[ df['National Rank'].apply(lambda x: national_top_three(x)) ]
national_rank

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...
1,1,2,1,Yoshua Bengio,University of Montreal,Canada,200,491250,909,https://s.research.com/images/aec914cd458a74e2...
2,2,3,2,Jiawei Han,University of Illinois at Urbana-Champaign,United States,186,209445,1177,https://s.research.com/images/42382fdef4ef0953...
3,3,4,3,Michael I. Jordan,University of California,Berkeley,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
...,...,...,...,...,...,...,...,...,...,...
1862,1862,1863,2,Antonio A. F. Loureiro,Universidade Federal de Minas Gerais,Brazil,62,14570,484,https://s.research.com/images/e2c61782ea2a7b5e...
1881,1882,1883,1,Mohammad S. Obaidat,University of Jordan,Jordan,62,12966,899,https://s.research.com/images/b532535373c28db4...
1894,1895,1896,3,Eibe Frank,University of Waikato,New Zealand,61,133778,160,https://s.research.com/images/4c8d865f130dcc55...
1928,1929,1930,2,Moustafa Youssef,American University in Cairo,Egypt,61,17305,272,https://s.research.com/images/74f32cc145d7b93a...


In [49]:
national_rank.sort_values(by=["Country","National Rank"])

Unnamed: 0.1,Unnamed: 0,World Rank,National Rank,Name,Affiliation,Country,D-Index,Citations,Number of Publications,Image URLs
10,10,11,1,Rajkumar Buyya,University of Melbourne,Australia,156,121353,1058,https://s.research.com/images/2994e0b16d912fb9...
23,23,24,2,Dacheng Tao,University of Sydney,Australia,142,74454,1217,https://s.research.com/images/72330d26d0dcc915...
282,282,283,3,James C. Bezdek,University of Melbourne,Australia,94,74347,402,https://s.research.com/images/c9341d42d32f4150...
152,152,153,1,Thomas A. Henzinger,Institute of Science and Technology Austria,Austria,107,54669,464,https://s.research.com/images/2f5eca6562e3fea5...
171,171,172,2,Gert Pfurtscheller,Graz University of Technology,Austria,104,54135,378,https://s.research.com/images/ba6aa74167d15d2d...
...,...,...,...,...,...,...,...,...,...,...
4,4,5,1,Andrew Zisserman,University of Oxford,United Kingdom,175,255987,698,https://s.research.com/images/2a3913cff9795e86...
44,44,45,2,Zidong Wang,Brunel University London,United Kingdom,130,52986,852,https://s.research.com/images/58bbd9d83bb85b8f...
45,45,46,3,Nicholas R. Jennings,Loughborough University,United Kingdom,129,83550,723,https://s.research.com/images/3ac5b504ec1dd4d0...
0,0,1,1,Anil K. Jain,Michigan State University,United States,203,250990,970,https://s.research.com/images/2a967b4499197336...


# Aggregate

In [50]:
country_presence = df.groupby('Country')
country_presence.first()

Unnamed: 0_level_0,Unnamed: 0,World Rank,National Rank,Name,Affiliation,D-Index,Citations,Number of Publications,Image URLs
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Australia,10,11,1,Rajkumar Buyya,University of Melbourne,156,121353,1058,https://s.research.com/images/2994e0b16d912fb9...
Austria,152,153,1,Thomas A. Henzinger,Institute of Science and Technology Austria,107,54669,464,https://s.research.com/images/2f5eca6562e3fea5...
Baltimore County,197,198,123,Tim Finin,University of Maryland,101,49841,579,https://s.research.com/images/49745edf0a51c611...
Belgium,128,129,1,Marco Dorigo,Université Libre de Bruxelles,110,140006,521,https://s.research.com/images/7c9e5565640fb89a...
Berkeley,3,4,3,Michael I. Jordan,University of California,176,220056,776,https://s.research.com/images/56e4335ff90165e8...
...,...,...,...,...,...,...,...,...,...
Turkey,1844,1845,1,Murat Uysal,Özyeğin University,62,16640,437,https://s.research.com/images/43ec08467f4a3d5b...
US,563,564,328,Jonathan J. Hull,Independent Scientist / Consultant,82,21433,328,https://s.research.com/images/2f79ee5bc28f1f72...
United Arab Emirates,37,38,1,Ian F. Akyildiz,Technology Innovation Institute,131,126102,601,https://s.research.com/images/7191b523b8868133...
United Kingdom,4,5,1,Andrew Zisserman,University of Oxford,175,255987,698,https://s.research.com/images/2a3913cff9795e86...


In [51]:
country_presence.describe()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,World Rank,World Rank,...,National Rank,National Rank,D-Index,D-Index,D-Index,D-Index,D-Index,D-Index,D-Index,D-Index
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Australia,51.0,1173.764706,527.466874,10.0,852.50,1185.0,1597.0,1969.0,51.0,1174.764706,...,38.50,52.0,51.0,73.941176,17.896829,61.0,64.0,70.0,75.0,156.0
Austria,13.0,880.307692,622.492488,152.0,319.00,839.0,1595.0,1854.0,13.0,881.307692,...,10.00,13.0,13.0,79.769231,15.471231,62.0,64.0,75.0,92.0,107.0
Baltimore County,5.0,949.400000,605.391444,197.0,422.00,1152.0,1472.0,1504.0,5.0,950.400000,...,824.00,840.0,5.0,77.800000,15.706686,65.0,66.0,70.0,87.0,101.0
Belgium,23.0,1036.782609,497.620516,128.0,660.00,1050.0,1254.0,1944.0,23.0,1037.782609,...,17.50,23.0,23.0,74.608696,11.858020,61.0,68.5,71.0,79.0,110.0
Berkeley,45.0,642.377778,601.038922,3.0,106.00,456.0,1047.0,1932.0,45.0,643.377778,...,607.00,1056.0,45.0,93.822222,28.575489,61.0,71.0,85.0,114.0,176.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Turkey,1.0,1844.000000,,1844.0,1844.00,1844.0,1844.0,1844.0,1.0,1845.000000,...,1.00,1.0,1.0,62.000000,,62.0,62.0,62.0,62.0,62.0
US,1.0,563.000000,,563.0,563.00,563.0,563.0,563.0,1.0,564.000000,...,328.00,328.0,1.0,82.000000,,82.0,82.0,82.0,82.0,82.0
United Arab Emirates,3.0,267.000000,213.550931,37.0,171.00,305.0,382.0,459.0,3.0,268.000000,...,1.50,2.0,3.0,103.000000,24.576411,85.0,89.0,93.0,112.0,131.0
United Kingdom,112.0,949.767857,573.262592,4.0,440.25,858.5,1464.5,1963.0,112.0,950.767857,...,85.25,113.0,112.0,78.285714,17.134039,61.0,66.0,74.5,86.0,175.0


In [54]:
country_presence['National Rank'].mean()

Country
 Australia                25.588235
 Austria                   7.000000
 Baltimore County        541.400000
 Belgium                  12.000000
 Berkeley                369.444444
                            ...    
 Turkey                    1.000000
 US                      328.000000
 United Arab Emirates      1.333333
 United Kingdom           57.321429
 United States           551.425785
Name: National Rank, Length: 66, dtype: float64

In [55]:
country_and_university_presence = df.groupby(['Country','Affiliation'])
country_and_university_presence.first()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,World Rank,National Rank,Name,D-Index,Citations,Number of Publications,Image URLs
Country,Affiliation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Australia,Australian National University,682,683,10,Richard Hartley,78,70243,377,https://s.research.com/images/a2eb08b356a3f291...
Australia,Commonwealth Scientific and Industrial Research Organisation,1080,1081,18,Toby Walsh,71,22398,416,https://s.research.com/images/27d7ff0f7401e659...
Australia,Curtin University,1409,1410,32,Xiangyu Wang,67,13600,389,https://s.research.com/images/cd1125e8c229d79e...
Australia,Deakin University,1316,1317,30,Svetha Venkatesh,68,17408,769,https://s.research.com/images/a2e164ed0bf34c92...
Australia,Griffith University,1709,1710,42,Kuldip K. Paliwal,63,20580,360,https://s.research.com/images/b05a2feb697b3b35...
...,...,...,...,...,...,...,...,...,...
United States,Worcester Polytechnic Institute,1557,1558,864,Elke A. Rundensteiner,65,13146,561,https://s.research.com/images/9cac1b90dedd50cf...
United States,Yale University,693,694,409,Leandros Tassiulas,78,32264,509,https://s.research.com/images/b1264cb1b432aad0...
United States,ZapFraud,1232,1233,711,Markus Jakobsson,69,18271,212,https://s.research.com/images/d6c9cac7ce7ebb2f...
United States,Zillow Group (United States),560,561,327,Sing Bing Kang,82,24645,296,https://s.research.com/images/c0baad35f59865dc...


In [57]:
country_and_university_presence['D-Index'].mean()

Country         Affiliation                                                 
 Australia      Australian National University                                  75.0
                Commonwealth Scientific and Industrial Research Organisation    71.0
                Curtin University                                               64.0
                Deakin University                                               68.0
                Griffith University                                             63.0
                                                                                ... 
 United States  Worcester Polytechnic Institute                                 65.0
                Yale University                                                 72.5
                ZapFraud                                                        69.0
                Zillow Group (United States)                                    82.0
                you.com                                                  

In [58]:
country_and_university_presence['D-Index'].median()

Country         Affiliation                                                 
 Australia      Australian National University                                  75.0
                Commonwealth Scientific and Industrial Research Organisation    71.0
                Curtin University                                               64.0
                Deakin University                                               68.0
                Griffith University                                             63.0
                                                                                ... 
 United States  Worcester Polytechnic Institute                                 65.0
                Yale University                                                 73.5
                ZapFraud                                                        69.0
                Zillow Group (United States)                                    82.0
                you.com                                                  

### Cross Tab

In [59]:
pd.crosstab(df["Country"],df["Affiliation"],margins=True)

Affiliation,AT&T (United States),Aalborg University,Aalto University,Aarhus University,Adobe Systems (United States),Agency for Science,Alfréd Rényi Institute of Mathematics,Algorand Foundation,Alibaba Group (China),Allen Institute for Artificial Intelligence,...,ZapFraud,Zhejiang University,Zillow Group (United States),you.com,École Normale Supérieure,École Polytechnique Fédérale de Lausanne,École de Technologie Supérieure,École des Ponts ParisTech,Özyeğin University,All
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Australia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,51
Austria,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,13
Baltimore County,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
Belgium,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,23
Berkeley,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
US,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
United Arab Emirates,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
United Kingdom,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,112
United States,3,0,0,0,2,0,0,0,0,1,...,1,0,1,1,0,0,0,0,0,923


In [60]:
df['Affiliation'].value_counts()

University of California                       125
MIT                                             62
Carnegie Mellon University                      60
Microsoft (United States)                       54
Stanford University                             52
                                              ... 
Southeast University                             1
Universidad Publica De Navarra                   1
Max Planck Society                               1
Allen Institute for Artificial Intelligence      1
National Cheng Kung University                   1
Name: Affiliation, Length: 557, dtype: int64

In [61]:
for key, count in df['Affiliation'].value_counts().items():
  if key == "Zhejiang University":
    print(count)

9


In [62]:
df['Country'].value_counts()

 United States     923
 China             188
 United Kingdom    112
 Germany            82
 Canada             77
                  ... 
 Estonia             1
 Hyderabad           1
 Turkey              1
 Jordan              1
 Cyprus              1
Name: Country, Length: 66, dtype: int64

### Pivot Table

In [64]:
pd.pivot_table(df,index=['Country','Affiliation'],values='D-Index',aggfunc=[np.mean, np.median, min, max, np.std])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median,min,max,std
Unnamed: 0_level_1,Unnamed: 1_level_1,D-Index,D-Index,D-Index,D-Index,D-Index
Country,Affiliation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Australia,Australian National University,75.0,75.0,72,78,4.242641
Australia,Commonwealth Scientific and Industrial Research Organisation,71.0,71.0,71,71,
Australia,Curtin University,64.0,64.0,61,67,4.242641
Australia,Deakin University,68.0,68.0,68,68,
Australia,Griffith University,63.0,63.0,63,63,
...,...,...,...,...,...,...
United States,Worcester Polytechnic Institute,65.0,65.0,65,65,
United States,Yale University,72.5,73.5,65,78,6.137318
United States,ZapFraud,69.0,69.0,69,69,
United States,Zillow Group (United States),82.0,82.0,82,82,


In [65]:
pd.pivot_table(df,index=['Country','Affiliation'],values=['D-Index','National Rank'],aggfunc=[np.mean, np.median, min, max, np.std])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,median,median,min,min,max,max,std,std
Unnamed: 0_level_1,Unnamed: 1_level_1,D-Index,National Rank,D-Index,National Rank,D-Index,National Rank,D-Index,National Rank,D-Index,National Rank
Country,Affiliation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Australia,Australian National University,75.0,12.5,75.0,12.5,72,10,78,15,4.242641,3.535534
Australia,Commonwealth Scientific and Industrial Research Organisation,71.0,18.0,71.0,18.0,71,18,71,18,,
Australia,Curtin University,64.0,40.5,64.0,40.5,61,32,67,49,4.242641,12.020815
Australia,Deakin University,68.0,30.0,68.0,30.0,68,30,68,30,,
Australia,Griffith University,63.0,42.0,63.0,42.0,63,42,63,42,,
...,...,...,...,...,...,...,...,...,...,...,...
United States,Worcester Polytechnic Institute,65.0,864.0,65.0,864.0,65,864,65,864,,
United States,Yale University,72.5,584.5,73.5,545.0,65,409,78,839,6.137318,204.995122
United States,ZapFraud,69.0,711.0,69.0,711.0,69,711,69,711,,
United States,Zillow Group (United States),82.0,327.0,82.0,327.0,82,327,82,327,,
