In [1]:
import pandas as pd

In [2]:
df_exams = pd.read_csv('./StudentsPerformance.csv')

In [3]:
df_exams.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


# 1 Math Operations

## 1.1 Operation in columns

In [5]:
# select a column and calculate total sum

df_exams['math score'].sum()

66089

In [11]:
# count, mean, std, max and min

print(
    df_exams['math score'].count(),
    df_exams['math score'].mean(),
    df_exams['math score'].std(),
    df_exams['math score'].max(),
    df_exams['math score'].min()
)

1000 66.089 15.163080096009468 100 0


In [14]:
# easier calculation with describe

df_exams.describe()

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


## 1.2 Operations in rows

In [29]:
# calculating the sum in a row

row_sum = (df_exams['math score'] + df_exams['reading score'] + df_exams['writing score'])/3

In [30]:
df_exams1 = df_exams.copy()
df_exams1['average'] = row_sum

In [35]:
df_exams1.round(2).head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,average
0,female,group B,bachelor's degree,standard,none,72,72,74,72.67
1,female,group C,some college,standard,completed,69,90,88,82.33
2,female,group B,master's degree,standard,none,90,95,93,92.67
3,male,group A,associate's degree,free/reduced,none,47,57,44,49.33
4,male,group C,some college,standard,none,76,78,75,76.33


# 2 Value Counts

In [39]:
# counting gender elements

# len function
len(df_exams['gender'])

# .count() method
df_exams['gender'].count()

1000

In [41]:
# counting gender elements by category

df_exams['gender'].value_counts()

gender
female    518
male      482
Name: count, dtype: int64

In [45]:
# return the relative frequency (divide all values by the sum of values)

df_exams['gender'].value_counts(normalize=True)

gender
female    0.518
male      0.482
Name: proportion, dtype: float64

In [46]:
# counting "parental level of education" elements by category
df_exams['parental level of education'].value_counts()

parental level of education
some college          226
associate's degree    222
high school           196
some high school      179
bachelor's degree     118
master's degree        59
Name: count, dtype: int64

In [47]:
# return the relative frequency and round to 2 decimal

df_exams['parental level of education'].value_counts(normalize=True).round(2)

parental level of education
some college          0.23
associate's degree    0.22
high school           0.20
some high school      0.18
bachelor's degree     0.12
master's degree       0.06
Name: proportion, dtype: float64

# 3 Sort a dataframe

In [52]:
# sort by one column

df_exams.sort_values(by='math score') #by is not necessary
df_exams.sort_values('math score')

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
59,female,group C,some high school,free/reduced,none,0,17,10
980,female,group B,high school,free/reduced,none,8,24,23
17,female,group B,some high school,free/reduced,none,18,32,28
787,female,group B,some college,standard,none,19,38,32
145,female,group C,some college,free/reduced,none,22,39,33
...,...,...,...,...,...,...,...,...
625,male,group D,some college,standard,completed,100,97,99
623,male,group A,some college,standard,completed,100,96,86
451,female,group E,some college,standard,none,100,92,97
962,female,group E,associate's degree,standard,none,100,100,100


In [53]:
#sort descending by one column

df_exams.sort_values('math score', ascending=False)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
962,female,group E,associate's degree,standard,none,100,100,100
625,male,group D,some college,standard,completed,100,97,99
458,female,group E,bachelor's degree,standard,none,100,100,100
623,male,group A,some college,standard,completed,100,96,86
451,female,group E,some college,standard,none,100,92,97
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [54]:
# sort descending by multiple columns

#           priority:      1             2 
df_exams.sort_values(['math score', 'reading score'], ascending=False)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
149,male,group E,associate's degree,free/reduced,completed,100,100,93
458,female,group E,bachelor's degree,standard,none,100,100,100
916,male,group E,bachelor's degree,standard,completed,100,100,100
962,female,group E,associate's degree,standard,none,100,100,100
625,male,group D,some college,standard,completed,100,97,99
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [56]:
# sort descending by multiple columns and update dataframe

df_exams.sort_values(['math score', 'reading score'], ascending=False,
                    inplace=True)
# inplace will make change in the original dataframe

In [57]:
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
149,male,group E,associate's degree,free/reduced,completed,100,100,93
458,female,group E,bachelor's degree,standard,none,100,100,100
916,male,group E,bachelor's degree,standard,completed,100,100,100
962,female,group E,associate's degree,standard,none,100,100,100
625,male,group D,some college,standard,completed,100,97,99
...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33
787,female,group B,some college,standard,none,19,38,32
17,female,group B,some high school,free/reduced,none,18,32,28
980,female,group B,high school,free/reduced,none,8,24,23


In [62]:
# sort descending with a key function

df_exams.sort_values('race/ethnicity', ascending=True,
                     key=lambda col:col.str.lower())

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
741,female,group A,associate's degree,free/reduced,none,37,57,56
151,male,group A,bachelor's degree,standard,none,77,67,68
811,male,group A,high school,free/reduced,none,45,47,49
112,male,group A,associate's degree,standard,none,54,53,47
25,male,group A,master's degree,free/reduced,none,73,74,72
...,...,...,...,...,...,...,...,...
751,male,group E,some college,standard,none,68,72,65
915,female,group E,some college,standard,none,68,70,66
592,male,group E,bachelor's degree,standard,none,68,68,64
479,male,group E,associate's degree,standard,none,76,71,67


# 4 Index

## 4.1 create Index

In [63]:
import numpy as np
import random

In [65]:
# creating no repeitive values for the index

new_index = np.arange(0, 1000)

In [70]:
# shuffling the indexes

random.shuffle(new_index)

In [72]:
# create a new column with new index

df_exams['new_index'] = new_index
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,new_index
149,male,group E,associate's degree,free/reduced,completed,100,100,93,687
458,female,group E,bachelor's degree,standard,none,100,100,100,962
916,male,group E,bachelor's degree,standard,completed,100,100,100,91
962,female,group E,associate's degree,standard,none,100,100,100,271
625,male,group D,some college,standard,completed,100,97,99,918
...,...,...,...,...,...,...,...,...,...
145,female,group C,some college,free/reduced,none,22,39,33,801
787,female,group B,some college,standard,none,19,38,32,481
17,female,group B,some high school,free/reduced,none,18,32,28,843
980,female,group B,high school,free/reduced,none,8,24,23,105


## 4.2 Set Index

In [74]:
# set new_index column as index

df_exams.set_index('new_index', inplace=True)

In [75]:
df_exams

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
687,male,group E,associate's degree,free/reduced,completed,100,100,93
962,female,group E,bachelor's degree,standard,none,100,100,100
91,male,group E,bachelor's degree,standard,completed,100,100,100
271,female,group E,associate's degree,standard,none,100,100,100
918,male,group D,some college,standard,completed,100,97,99
...,...,...,...,...,...,...,...,...
801,female,group C,some college,free/reduced,none,22,39,33
481,female,group B,some college,standard,none,19,38,32
843,female,group B,some high school,free/reduced,none,18,32,28
105,female,group B,high school,free/reduced,none,8,24,23


## 4.3 Sort Index

In [76]:
# sort dataframe by new index

df_exams.sort_index()

Unnamed: 0_level_0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,male,group A,some college,free/reduced,none,58,60,57
1,male,group C,associate's degree,standard,completed,51,60,58
2,female,group C,high school,standard,none,66,71,76
3,male,group D,high school,free/reduced,none,74,70,69
4,female,group C,some high school,free/reduced,completed,71,84,87
...,...,...,...,...,...,...,...,...
995,female,group B,bachelor's degree,standard,none,61,72,70
996,female,group C,associate's degree,free/reduced,none,53,61,62
997,female,group A,some high school,free/reduced,none,47,59,50
998,male,group D,high school,free/reduced,completed,78,77,80


In [78]:
# sort descending by index

df_exams.sort_index(ascending=False, inplace=True)