# Data Visualization using Pandas

## Importing the necessary libraries  

In [26]:
import pandas as pd
import numpy as np

## Intro to Dataframes

In [27]:
df = pd.DataFrame([[1,2],[4,5],[6,7]])
df

Unnamed: 0,0,1
0,1,2
1,4,5
2,6,7


In [28]:
df = pd.DataFrame([[1,2],[4,5],[6,7]], columns = ["A","B"])
df


Unnamed: 0,A,B
0,1,2
1,4,5
2,6,7


In [29]:
df = pd.DataFrame([[1,2],[4,5],[6,7]], columns = ["A","B"], index=["x","y","z"])
df

Unnamed: 0,A,B
x,1,2
y,4,5
z,6,7


In [30]:
df.columns


Index(['A', 'B'], dtype='object')

In [31]:
df.index

Index(['x', 'y', 'z'], dtype='object')

### **Indices are the rows and columns are columns**
#### You can use .tolist after index and columns to get it in list format

In [32]:
df.index.tolist()

['x', 'y', 'z']

In [33]:
df.columns.tolist()

['A', 'B']

In [34]:
df.head()


Unnamed: 0,A,B
x,1,2
y,4,5
z,6,7


In [35]:
df.tail()

Unnamed: 0,A,B
x,1,2
y,4,5
z,6,7


#### df.head() only gives the first 5 data rows and df.tail() gives the last 5

In [None]:
df.head(1)


Unnamed: 0,A,B
x,1,2


In [None]:
df.head(2)

Unnamed: 0,A,B
x,1,2
y,4,5


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, x to z
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
dtypes: int64(2)
memory usage: 72.0+ bytes


#### Gives information regarding the dataframe

In [None]:
df.describe()

Unnamed: 0,A,B
count,3.0,3.0
mean,3.666667,4.666667
std,2.516611,2.516611
min,1.0,2.0
25%,2.5,3.5
50%,4.0,5.0
75%,5.0,6.0
max,6.0,7.0


#### Gives meaningful information regarding the dataframe

In [None]:
df.nunique()

A    3
B    3
dtype: int64

#### Gives the number of non unique values in each row
#### can be specfically done on one row as well as shown

In [None]:
df['A'].nunique()

3

In [42]:
df['A'].unique()

array([1, 4, 6], dtype=int64)

In [None]:
df.shape

(3, 2)

#### Gives the shape of the dataframe

In [None]:
df.size

6

#### Total Number of spaces ( no. of rows multiplied by no. of columns)

## To import the dataset from the excel file to the dataframe from PANDAS

In [89]:
jee = pd.read_csv('JEE_Dropout_After_Class_12.csv', delimiter=',')
jee.head()

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,70.06,58.75,64.33,State,78.0,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
3,93.32,60.72,69.33,ICSE,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0
4,68.72,77.73,82.37,CBSE,89.02,1,Allen,6.3,Mid,Graduate,Semi-Urban,High,No,Yes,0


#### pd.read_csv reads the csv file and enters it into a Dataframe called "df" and if it is a text file, instead of the delimiter being ',', you can put '\t'



In [90]:
jee.columns.tolist()

['jee_main_score',
 'jee_advanced_score',
 'mock_test_score_avg',
 'school_board',
 'class_12_percent',
 'attempt_count',
 'coaching_institute',
 'daily_study_hours',
 'family_income',
 'parent_education',
 'location_type',
 'peer_pressure_level',
 'mental_health_issues',
 'admission_taken',
 'dropout']

### code such as **jee.tocsv, jee.toparquet, jee.toexcel** will help us save the changes in the dataframe to the source file.

In [94]:
jee.sample(10)

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
3286,67.38,68.43,82.21,State,66.65,1,Allen,4.5,Mid,PG,Urban,High,Yes,Yes,0
494,93.32,72.89,59.53,State,87.1,2,,1.5,Mid,Graduate,Urban,Low,Yes,No,0
3887,69.73,63.95,60.04,CBSE,58.88,1,Local,2.7,Low,Graduate,Urban,Medium,Yes,Yes,0
293,83.89,51.31,65.91,State,77.07,1,Allen,8.6,High,PG,Rural,Medium,Yes,Yes,0
686,68.64,86.08,48.34,State,95.24,2,Allen,3.7,Mid,Upto 10th,Urban,High,Yes,No,0
3908,80.2,75.04,59.89,CBSE,81.71,1,Allen,0.3,Low,PG,Semi-Urban,High,Yes,Yes,1
3184,61.08,66.86,62.81,State,70.69,2,,4.6,Mid,Graduate,Urban,Medium,Yes,Yes,0
1731,67.11,51.86,97.35,CBSE,85.43,1,,5.1,High,Upto 10th,Semi-Urban,Medium,No,Yes,0
3077,79.74,84.18,90.26,CBSE,77.38,1,Local,5.9,High,Graduate,Semi-Urban,Medium,Yes,No,0
1424,57.37,57.19,54.16,ICSE,77.65,2,Allen,4.4,Mid,12th,Rural,Low,No,Yes,0


#### .sample picks the required amount of rows from the dataframe. In the parameters, random_state="" helps act like a seed to lock in a selection of random picks

## loc and iloc

### The only difference between '.loc' and '.iloc' (Integer loc, only integers allowed) is that loc can hold the column names while iloc will only allow the index and column numbers to be used to select data
### **Another difference is that in '.loc', the upper limit is also included in the data while in '.iloc', it is not shown**

In [None]:
jee.loc[0]

jee_main_score              78.95
jee_advanced_score          59.22
mock_test_score_avg         59.86
school_board                 CBSE
class_12_percent            70.09
attempt_count                   1
coaching_institute        FIITJEE
daily_study_hours             5.4
family_income                 Low
parent_education        Upto 10th
location_type               Urban
peer_pressure_level           Low
mental_health_issues           No
admission_taken                No
dropout                         1
Name: 0, dtype: object

In [95]:
jee.loc[0:3]

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,70.06,58.75,64.33,State,78.0,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
3,93.32,60.72,69.33,ICSE,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0


In [96]:
jee.loc[0:3,["jee_main_score","attempt_count"]]

Unnamed: 0,jee_main_score,attempt_count
0,78.95,1
1,70.06,1
2,81.07,1
3,93.32,1


In [None]:
jee.iloc[5:]

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
5,68.72,45.61,58.75,State,79.28,1,Local,4.5,Mid,Graduate,Semi-Urban,Low,No,Yes,0
6,94.11,82.78,80.01,CBSE,76.31,1,Local,3.7,Mid,Upto 10th,Semi-Urban,Medium,No,Yes,0
7,82.74,73.80,58.62,CBSE,77.15,1,,2.2,High,12th,Urban,Low,No,Yes,0
8,65.43,54.25,57.83,ICSE,71.11,1,Local,5.7,Mid,Upto 10th,Semi-Urban,Low,No,Yes,0
9,79.60,63.74,77.82,State,94.96,2,,2.7,Mid,12th,Rural,High,Yes,No,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,71.31,86.82,57.08,State,84.45,1,FIITJEE,4.7,High,PG,Semi-Urban,High,Yes,Yes,0
4996,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
4997,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0
4998,83.31,73.93,57.39,CBSE,62.25,1,Allen,4.0,High,Upto 10th,Urban,Low,No,Yes,0


In [None]:
jee.iloc[0]

jee_main_score              78.95
jee_advanced_score          59.22
mock_test_score_avg         59.86
school_board                 CBSE
class_12_percent            70.09
attempt_count                   1
coaching_institute        FIITJEE
daily_study_hours             5.4
family_income                 Low
parent_education        Upto 10th
location_type               Urban
peer_pressure_level           Low
mental_health_issues           No
admission_taken                No
dropout                         1
Name: 0, dtype: object

In [97]:
jee.iloc[0:3]

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,70.06,58.75,64.33,State,78.0,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1


In [87]:
jee.iloc[0:2,["jee_main_score"]]

IndexError: .iloc requires numeric indexers, got ['jee_main_score']

In [98]:
jee.index=jee["school_board"]
jee

Unnamed: 0_level_0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
school_board,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
CBSE,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
State,70.06,58.75,64.33,State,78.00,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
ICSE,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
ICSE,93.32,60.72,69.33,ICSE,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0
CBSE,68.72,77.73,82.37,CBSE,89.02,1,Allen,6.3,Mid,Graduate,Semi-Urban,High,No,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
State,71.31,86.82,57.08,State,84.45,1,FIITJEE,4.7,High,PG,Semi-Urban,High,Yes,Yes,0
CBSE,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
CBSE,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0
CBSE,83.31,73.93,57.39,CBSE,62.25,1,Allen,4.0,High,Upto 10th,Urban,Low,No,Yes,0


### The Serial Numbers have been replaced with the curriculum board

In [99]:
jee.loc[["CBSE","ICSE","State"],"jee_main_score"]


school_board
CBSE      78.95
CBSE      68.72
CBSE      94.11
CBSE      82.74
CBSE      65.51
          ...  
State    100.00
State     70.39
State     62.80
State     71.31
State     60.13
Name: jee_main_score, Length: 5000, dtype: float64

In [100]:
jee.loc[["CBSE","ICSE","State"],"jee_main_score"].iloc[0:8]

school_board
CBSE    78.95
CBSE    68.72
CBSE    94.11
CBSE    82.74
CBSE    65.51
CBSE    75.39
CBSE    64.13
CBSE    52.23
Name: jee_main_score, dtype: float64

In [101]:
jee.reset_index(drop=True, inplace=True)
jee

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,70.06,58.75,64.33,State,78.00,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
3,93.32,60.72,69.33,ICSE,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0
4,68.72,77.73,82.37,CBSE,89.02,1,Allen,6.3,Mid,Graduate,Semi-Urban,High,No,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,71.31,86.82,57.08,State,84.45,1,FIITJEE,4.7,High,PG,Semi-Urban,High,Yes,Yes,0
4996,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
4997,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0
4998,83.31,73.93,57.39,CBSE,62.25,1,Allen,4.0,High,Upto 10th,Urban,Low,No,Yes,0


In [102]:
jee.groupby("school_board").first().loc[["CBSE", "ICSE", "State"], "jee_main_score"]

school_board
CBSE     78.95
ICSE     81.07
State    70.06
Name: jee_main_score, dtype: float64

### Picks the first among the three curricula and displays the intended column

In [103]:
jee.groupby("school_board").apply(lambda x: x.sample(1))[["school_board", "jee_main_score"]].reset_index(drop=True)

Unnamed: 0,school_board,jee_main_score
0,CBSE,88.81
1,ICSE,83.42
2,State,85.2


##### Upgraded version of the previous one
### Picks a random one among the three curricula and displays the intended column

## What if we want to change a value somewhere in the dataframe?

In [104]:
jee

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,70.06,58.75,64.33,State,78.00,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
3,93.32,60.72,69.33,ICSE,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0
4,68.72,77.73,82.37,CBSE,89.02,1,Allen,6.3,Mid,Graduate,Semi-Urban,High,No,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,71.31,86.82,57.08,State,84.45,1,FIITJEE,4.7,High,PG,Semi-Urban,High,Yes,Yes,0
4996,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
4997,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0
4998,83.31,73.93,57.39,CBSE,62.25,1,Allen,4.0,High,Upto 10th,Urban,Low,No,Yes,0


In [105]:
jee.loc[1,"jee_main_score"] = 80
jee

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,CBSE,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,80.00,58.75,64.33,State,78.00,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,ICSE,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
3,93.32,60.72,69.33,ICSE,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0
4,68.72,77.73,82.37,CBSE,89.02,1,Allen,6.3,Mid,Graduate,Semi-Urban,High,No,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,71.31,86.82,57.08,State,84.45,1,FIITJEE,4.7,High,PG,Semi-Urban,High,Yes,Yes,0
4996,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
4997,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0
4998,83.31,73.93,57.39,CBSE,62.25,1,Allen,4.0,High,Upto 10th,Urban,Low,No,Yes,0


#### You can see the change in the 1st row under the "JEE_MAIN_SCORE" column
#### this can be done with multiple values as well (lets change the first three curricula to State)

In [106]:
jee.loc[0:3,"school_board"] = "State"
jee

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
0,78.95,59.22,59.86,State,70.09,1,FIITJEE,5.4,Low,Upto 10th,Urban,Low,No,No,1
1,80.00,58.75,64.33,State,78.00,1,FIITJEE,5.5,Mid,Upto 10th,Urban,Low,Yes,No,0
2,81.07,37.27,60.83,State,64.36,1,FIITJEE,7.0,Low,PG,Semi-Urban,Medium,Yes,No,1
3,93.32,60.72,69.33,State,73.21,1,FIITJEE,2.1,Low,12th,Semi-Urban,Medium,Yes,Yes,0
4,68.72,77.73,82.37,CBSE,89.02,1,Allen,6.3,Mid,Graduate,Semi-Urban,High,No,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,71.31,86.82,57.08,State,84.45,1,FIITJEE,4.7,High,PG,Semi-Urban,High,Yes,Yes,0
4996,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
4997,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0
4998,83.31,73.93,57.39,CBSE,62.25,1,Allen,4.0,High,Upto 10th,Urban,Low,No,Yes,0


## .iat and .at

In [107]:
jee.at[0,"school_board"]

'State'

In [108]:
jee.iat[0,3]

'State'

#### here you can see that through '.at' and '.iat' (Integer at, only integers allowed), we were able to select the same thing. This can also be done using loc and iloc

In [109]:
jee.loc[0,"school_board"]

'State'

In [110]:
jee.iloc[0,3]

'State'

## Accessing Data 

In [111]:
jee["jee_advanced_score"]

0       59.22
1       58.75
2       37.27
3       60.72
4       77.73
        ...  
4995    86.82
4996    34.03
4997    54.71
4998    73.93
4999    76.31
Name: jee_advanced_score, Length: 5000, dtype: float64

In [112]:
jee.school_board

0       State
1       State
2       State
3       State
4        CBSE
        ...  
4995    State
4996     CBSE
4997     CBSE
4998     CBSE
4999    State
Name: school_board, Length: 5000, dtype: object

In [113]:
jee.sort_values(["jee_main_score","jee_advanced_score"], ascending=[0,1])

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
2305,100.00,17.29,59.83,State,81.92,1,FIITJEE,6.2,Low,Graduate,Rural,Low,Yes,Yes,0
1233,100.00,26.19,66.27,CBSE,75.34,2,,6.0,High,PG,Rural,Medium,No,Yes,0
880,100.00,27.54,57.79,ICSE,95.99,1,FIITJEE,3.2,High,Graduate,Urban,Medium,Yes,No,0
1924,100.00,33.48,62.04,ICSE,78.33,2,Local,1.7,Low,Upto 10th,Urban,Low,Yes,Yes,0
4186,100.00,38.65,73.29,State,60.75,1,Allen,4.6,Mid,PG,Urban,Low,No,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4198,29.89,26.20,69.21,CBSE,92.55,2,Allen,7.3,High,PG,Rural,High,Yes,No,1
2024,29.73,84.20,79.59,ICSE,72.36,1,Local,5.1,Low,Graduate,Semi-Urban,High,No,Yes,0
4725,27.61,61.21,95.65,ICSE,81.77,1,Local,5.0,Low,Graduate,Urban,Low,No,No,1
4779,27.53,71.90,62.12,ICSE,68.95,2,FIITJEE,5.1,Low,Upto 10th,Semi-Urban,Medium,Yes,No,1


In [114]:
jee.sort_values(["jee_main_score","jee_advanced_score"], ascending=False).loc[0:,["coaching_institute","school_board","mental_health_issues","jee_main_score","jee_advanced_score"]]

Unnamed: 0,coaching_institute,school_board,mental_health_issues,jee_main_score,jee_advanced_score
0,FIITJEE,State,No,78.95,59.22
1113,FIITJEE,State,No,78.94,52.03
4741,Allen,State,Yes,78.94,47.36
4312,FIITJEE,ICSE,Yes,78.93,83.25
4206,Local,CBSE,No,78.92,82.84
...,...,...,...,...,...
4198,Allen,CBSE,Yes,29.89,26.20
2024,Local,ICSE,No,29.73,84.20
4725,Local,ICSE,No,27.61,61.21
4779,FIITJEE,ICSE,Yes,27.53,71.90


#### It first sorts according to jee_main_score, and if there are values that are the same, it sorts according to jee_advanced_score

In [115]:
jee[jee.isnull().any(axis=1)]

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
7,82.74,73.80,58.62,CBSE,77.15,1,,2.2,High,12th,Urban,Low,No,Yes,0
9,79.60,63.74,77.82,State,94.96,2,,2.7,Mid,12th,Rural,High,Yes,No,0
10,65.51,91.57,79.06,CBSE,66.88,1,,5.5,Mid,Graduate,Urban,Low,No,Yes,0
15,64.13,57.81,68.50,CBSE,74.68,2,,6.0,High,Graduate,Semi-Urban,Low,No,No,0
17,76.40,98.67,73.50,State,78.29,1,,4.0,High,PG,Rural,High,Yes,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4973,42.80,75.66,64.51,CBSE,78.95,2,,4.8,High,PG,Urban,Medium,No,Yes,0
4975,52.77,57.33,63.99,CBSE,68.00,2,,3.6,Mid,Upto 10th,Urban,Low,Yes,Yes,0
4980,55.23,58.87,53.67,State,88.74,1,,4.7,Mid,12th,Semi-Urban,Low,Yes,No,0
4994,62.80,85.11,64.35,State,89.05,1,,0.6,High,12th,Semi-Urban,High,Yes,Yes,1


### This prints the rows that have NaN anywhere in the row

In [116]:
for index, row in jee.iterrows():
  print(index)
  print(row)
  print("\n\n")

0
jee_main_score              78.95
jee_advanced_score          59.22
mock_test_score_avg         59.86
school_board                State
class_12_percent            70.09
attempt_count                   1
coaching_institute        FIITJEE
daily_study_hours             5.4
family_income                 Low
parent_education        Upto 10th
location_type               Urban
peer_pressure_level           Low
mental_health_issues           No
admission_taken                No
dropout                         1
Name: 0, dtype: object



1
jee_main_score               80.0
jee_advanced_score          58.75
mock_test_score_avg         64.33
school_board                State
class_12_percent             78.0
attempt_count                   1
coaching_institute        FIITJEE
daily_study_hours             5.5
family_income                 Mid
parent_education        Upto 10th
location_type               Urban
peer_pressure_level           Low
mental_health_issues          Yes
admission_taken   

In [117]:
for index, row in jee.iterrows():
  print(index)
  print(row["jee_main_score"])
  print("\n\n")

0
78.95



1
80.0



2
81.07



3
93.32



4
68.72



5
68.72



6
94.11



7
82.74



8
65.43



9
79.6



10
65.51



11
65.48



12
75.39



13
45.21



14
47.85



15
64.13



16
57.82



17
76.4



18
59.29



19
52.23



20
92.52



21
68.84



22
72.95



23
52.05



24
64.38



25
73.55



26
55.89



27
77.26



28
63.59



29
67.92



30
63.58



31
97.93



32
71.81



33
57.19



34
83.52



35
54.91



36
74.92



37
44.56



38
53.41



39
74.76



40
82.34



41
74.4



42
70.38



43
67.78



44
51.3



45
61.92



46
65.55



47
86.8



48
76.81



49
47.32



50
76.54



51
66.61



52
62.52



53
80.56



54
86.43



55
85.04



56
60.25



57
67.67



58
76.64



59
85.66



60
65.29



61
69.4



62
56.51



63
55.25



64
83.38



65
90.99



66
70.99



67
86.05



68
77.06



69
62.97



70
77.06



71
93.53



72
71.5



73
93.91



74
35.32



75
83.51



76
73.22



77
67.81



78
73.28



79
44.17



80
68.92



81
77.0



82
92.69



83
64.74



84
60.68




In [118]:
jee.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   jee_main_score        5000 non-null   float64
 1   jee_advanced_score    5000 non-null   float64
 2   mock_test_score_avg   5000 non-null   float64
 3   school_board          5000 non-null   object 
 4   class_12_percent      5000 non-null   float64
 5   attempt_count         5000 non-null   int64  
 6   coaching_institute    3791 non-null   object 
 7   daily_study_hours     5000 non-null   float64
 8   family_income         5000 non-null   object 
 9   parent_education      5000 non-null   object 
 10  location_type         5000 non-null   object 
 11  peer_pressure_level   5000 non-null   object 
 12  mental_health_issues  5000 non-null   object 
 13  admission_taken       5000 non-null   object 
 14  dropout               5000 non-null   int64  
dtypes: float64(5), int64(

In [119]:
jee.loc[jee['jee_main_score']==100, ['class_12_percent','attempt_count']]

Unnamed: 0,class_12_percent,attempt_count
113,67.05,1
125,76.06,2
179,57.81,1
209,76.67,2
220,67.45,1
...,...,...
4851,78.72,2
4855,85.22,2
4870,68.25,2
4989,71.27,2


### Can also be done as 

In [120]:
jee[jee['attempt_count']>1]

Unnamed: 0,jee_main_score,jee_advanced_score,mock_test_score_avg,school_board,class_12_percent,attempt_count,coaching_institute,daily_study_hours,family_income,parent_education,location_type,peer_pressure_level,mental_health_issues,admission_taken,dropout
9,79.60,63.74,77.82,State,94.96,2,,2.7,Mid,12th,Rural,High,Yes,No,0
12,75.39,66.14,52.21,CBSE,76.15,2,Allen,6.5,Mid,12th,Rural,Medium,No,Yes,0
13,45.21,62.20,62.41,ICSE,85.09,2,Allen,7.3,Low,Graduate,Urban,Medium,Yes,No,1
14,47.85,67.23,77.88,State,86.03,2,Local,3.1,Low,12th,Rural,Medium,Yes,No,1
15,64.13,57.81,68.50,CBSE,74.68,2,,6.0,High,Graduate,Semi-Urban,Low,No,No,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,42.61,88.94,84.21,CBSE,57.93,2,Local,3.2,Low,Upto 10th,Rural,Medium,No,Yes,0
4992,70.39,76.59,70.55,State,71.73,2,Allen,3.9,High,12th,Semi-Urban,Medium,Yes,Yes,0
4996,81.96,34.03,82.85,CBSE,73.02,2,Local,4.7,Mid,PG,Urban,High,Yes,Yes,0
4997,100.00,54.71,83.75,CBSE,77.87,2,,6.3,Mid,PG,Semi-Urban,Medium,Yes,Yes,0


In [122]:
jee[jee['attempt_count']>1]['mental_health_issues']

9       Yes
12       No
13      Yes
14      Yes
15       No
       ... 
4990     No
4992    Yes
4996    Yes
4997    Yes
4999    Yes
Name: mental_health_issues, Length: 2523, dtype: object

In [123]:
jee[(jee['attempt_count']>1) & (jee['mental_health_issues']=='Yes')][['daily_study_hours','peer_pressure_level']]

Unnamed: 0,daily_study_hours,peer_pressure_level
9,2.7,High
13,7.3,Medium
14,3.1,Medium
19,3.3,Medium
23,2.0,High
...,...,...
4985,0.0,High
4992,3.9,Medium
4996,4.7,High
4997,6.3,Medium


### Use double brackets when selecting multiple columns to be shown