In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings 
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('mode.chained_assignment',None)

pd.options.plotting.backend = "plotly"

In [5]:
# We will use in this study only data referring to 'UniversityRanking'

df_university = pd.read_csv("D://Data_h//nirf//UniversityRanking.csv")

In [6]:
df_university.head()

Unnamed: 0,Institute Id,Institute Name,City,State,Score_21,Rank_21,TLR_21,RPC_21,GO_21,OI_21,...,GO_17,OI_17,Perception_17,Score_16,Rank_16,TLR_16,RPC_16,GO_16,OI_16,Perception_16
0,IR-O-U-0220,Indian Institute of Science,Bengaluru,Karnataka,82.67,1.0,79.13,91.48,78.23,58.39,...,,,,91.81,1,94.45,96.12,100.0,67.18,100.0
1,IR-O-U-0109,Jawaharlal Nehru University,New Delhi,Delhi,67.99,2.0,71.19,44.96,95.07,73.36,...,98.71,82.4,47.27,86.45,3,89.45,84.33,100.0,73.57,98.0
2,IR-O-U-0500,Banaras Hindu University,Varanasi,Uttar Pradesh,64.02,3.0,64.5,45.0,100.0,53.13,...,94.36,62.97,44.01,81.22,7,70.28,81.11,92.6,87.75,99.0
3,IR-O-U-0570,Calcutta University,Kolkata,West Bengal,62.06,4.0,66.2,43.92,91.72,60.98,...,95.76,39.02,17.15,,,,,,,
4,IR-O-U-0436,Amrita Vishwa Vidyapeetham,Coimbatore,Tamil Nadu,61.23,5.0,64.23,54.33,71.35,65.54,...,65.77,82.17,26.12,74.02,14A,74.39,69.03,100.0,74.02,80.0


In [7]:
df_university.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 46 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Institute Id    160 non-null    object 
 1   Institute Name  160 non-null    object 
 2   City            160 non-null    object 
 3   State           160 non-null    object 
 4   Score_21        100 non-null    float64
 5   Rank_21         100 non-null    float64
 6   TLR_21          100 non-null    float64
 7   RPC_21          100 non-null    float64
 8   GO_21           100 non-null    float64
 9   OI_21           100 non-null    float64
 10  Perception_21   100 non-null    float64
 11  Score_20        100 non-null    float64
 12  Rank_20         100 non-null    float64
 13  TLR_20          100 non-null    float64
 14  RPC_20          100 non-null    float64
 15  GO_20           100 non-null    float64
 16  OI_20           100 non-null    float64
 17  Perception_20   100 non-null    flo

In [8]:
# Column 'Rank_16' is type object. Let's analyze this column

df_university['Rank_16'].unique()

array(['1', '3', '7', nan, '14A', '83', '32', '4', '10', '6', '14', '2',
       '9', '17', '16A', '53', '12', '43', '33', '49', '29', '16', '42',
       '18', '73', '30', '22', '5', '52', '63', '72', '13', '15', '11',
       '64', '55', '47', '87', '44', '21', '93', '65', '66', '76', '91',
       '20', '51', '74', '45', '40', '36', '48', '41A', '31', '98', '8',
       '35', '57', '77', '37', '68', '69'], dtype=object)

In [9]:
# We can see that there is the letter 'A' in this column. Let's delete str 'A'.

df_university['Rank_16'] = df_university['Rank_16'].str.rstrip('A')

# Change type 'object' to 'float'
df_university['Rank_16'] = df_university['Rank_16'].astype(float)

In [10]:
# Create Dataframe only with columns related to the rank of universities

df = df_university[['Institute Name', 'City', 'State', 'Rank_16', 'Rank_17', 'Rank_18', 'Rank_19', 'Rank_20', 'Rank_21']]

In [11]:
# Change column names

df = df.rename({"Institute Name":"Institute", "Rank_16":"2016", "Rank_17":"2017",
                "Rank_18":"2018", "Rank_19":"2019", "Rank_20":"2020", "Rank_21":"2021"}, axis='columns')

In [12]:
df


Unnamed: 0,Institute,City,State,2016,2017,2018,2019,2020,2021
0,Indian Institute of Science,Bengaluru,Karnataka,1.0,,1.0,1.0,1.0,1.0
1,Jawaharlal Nehru University,New Delhi,Delhi,3.0,2.0,2.0,2.0,2.0,2.0
2,Banaras Hindu University,Varanasi,Uttar Pradesh,7.0,3.0,3.0,3.0,3.0,3.0
3,Calcutta University,Kolkata,West Bengal,,16.0,14.0,5.0,7.0,4.0
4,Amrita Vishwa Vidyapeetham,Coimbatore,Tamil Nadu,14.0,9.0,8.0,8.0,4.0,5.0
...,...,...,...,...,...,...,...,...,...
155,Jaypee University of Information Technology-Solan,Solan,Himachal Pradesh,37.0,93.0,,,,
156,University of Allahabad,Allahabad,Uttar Pradesh,68.0,95.0,,,,
157,Sri Krishnadevaraya University,Anantapur,Andhra Pradesh,,96.0,,,,
158,Jawaharlal Nehru Technological University,Kakinada,Andhra Pradesh,69.0,99.0,,,,


In [20]:
df_scores = df_university[['Institute Name', 'Score_21', 'Score_20', 'Score_19', 'Score_18', 'Score_17', 'Score_16']]

In [13]:
# Check for null data 'NaN'
print(df.isna().sum())

Institute     0
City          0
State         0
2016         99
2017         60
2018         60
2019         60
2020         60
2021         60
dtype: int64


In [14]:
# Replace null data with 101, as ranked universities go up to 100.
df = df.fillna(101)

In [15]:
# Reduce university names for better graphical visualization
names = []
for i in df.Institute:
    names.append(" ".join(i.split(' ')[:5]))
df['Institute'] = names

In [16]:
df


Unnamed: 0,Institute,City,State,2016,2017,2018,2019,2020,2021
0,Indian Institute of Science,Bengaluru,Karnataka,1.0,101.0,1.0,1.0,1.0,1.0
1,Jawaharlal Nehru University,New Delhi,Delhi,3.0,2.0,2.0,2.0,2.0,2.0
2,Banaras Hindu University,Varanasi,Uttar Pradesh,7.0,3.0,3.0,3.0,3.0,3.0
3,Calcutta University,Kolkata,West Bengal,101.0,16.0,14.0,5.0,7.0,4.0
4,Amrita Vishwa Vidyapeetham,Coimbatore,Tamil Nadu,14.0,9.0,8.0,8.0,4.0,5.0
...,...,...,...,...,...,...,...,...,...
155,Jaypee University of Information Technology-Solan,Solan,Himachal Pradesh,37.0,93.0,101.0,101.0,101.0,101.0
156,University of Allahabad,Allahabad,Uttar Pradesh,68.0,95.0,101.0,101.0,101.0,101.0
157,Sri Krishnadevaraya University,Anantapur,Andhra Pradesh,101.0,96.0,101.0,101.0,101.0,101.0
158,Jawaharlal Nehru Technological University,Kakinada,Andhra Pradesh,69.0,99.0,101.0,101.0,101.0,101.0


In [21]:
df_scores.loc[:,'Nan'] = df_scores.isnull().sum(axis=1)

df_scores = df_scores[df_scores['Nan'] < 4]

df_scores.reset_index(inplace=True, drop=True)

df_scores = df_scores.drop(columns='Nan', axis=1)

In [22]:
df_scores = df_scores[['Institute Name', 'Score_16', 'Score_17', 'Score_18', 'Score_19', 'Score_20', 'Score_21']]

df_scores.head()

Unnamed: 0,Institute Name,Score_16,Score_17,Score_18,Score_19,Score_20,Score_21
0,Indian Institute of Science,91.81,,82.16,82.28,84.18,82.67
1,Jawaharlal Nehru University,86.45,61.53,67.57,68.68,70.16,67.99
2,Banaras Hindu University,81.22,58.92,63.52,64.55,63.15,64.02
3,Calcutta University,,48.9,53.38,60.87,61.53,62.06
4,Amrita Vishwa Vidyapeetham,74.02,54.7,58.46,59.22,62.27,61.23


In [25]:
sp_df = df_scores[df_scores['Institute Name'].str.contains('Shoolini University of Biotechnology and Management Sciences')]
print(sp_df)
fig = sp_df.set_index('Institute Name').T.plot()
fig.update_layout(width=1000, height=500)
fig.show()

Empty DataFrame
Columns: [Institute Name, Score_16, Score_17, Score_18, Score_19, Score_20, Score_21]
Index: []
