# Case Study # 2 : Measures of Central Tendency and Dispersion

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [64]:
data = pd.read_csv('StudentsPerformance.csv')

In [4]:
data.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group C,some high school,free/reduced,none,0,17,10
1,female,group B,high school,free/reduced,none,8,24,23
2,female,group B,some high school,free/reduced,none,18,32,28
3,female,group B,some college,standard,none,11,38,32
4,female,group C,some college,free/reduced,none,22,39,33


### 1. Find out how many males and females participated in the test

In [9]:
data.count()

gender                         1000
race/ethnicity                 1000
parental level of education    1000
lunch                          1000
test preparation course        1000
math score                     1000
reading score                  1000
writing score                  1000
dtype: int64

In [7]:
data['gender'].value_counts()

female    518
male      482
Name: gender, dtype: int64

##### 482 males and 518 females participated in the test

### 2. What do you think about the students' parental level of education

In [8]:
data['parental level of education'].value_counts()

some college          226
associate's degree    222
high school           196
some high school      179
bachelor's degree     118
master's degree        59
Name: parental level of education, dtype: int64

###### Parents of all the students have received minimum school level education. 37.5% Parents have school level education, 34% have completed degree level education, 23% have attended some college and 5.5% parents have Master's degree

### 3. Who scores the most on average for math, reading and writing based on
#### (a) Gender

In [12]:
round(data.groupby('gender')['math score'].mean(), 2)

gender
female    63.38
male      68.82
Name: math score, dtype: float64

In [13]:
round(data.groupby('gender')['reading score'].mean(), 2)

gender
female    72.59
male      65.55
Name: reading score, dtype: float64

In [14]:
round(data.groupby('gender')['writing score'].mean(), 2)

gender
female    72.47
male      63.45
Name: writing score, dtype: float64

##### On average Male candidates scored more on Math and Female candidates scored more on Reading & Writing

#### (b) Test preparation course

In [16]:
data['test preparation course'].value_counts()

none         642
completed    358
Name: test preparation course, dtype: int64

In [17]:
round(data.groupby('test preparation course')['math score'].mean(), 2)

test preparation course
completed    69.97
none         63.79
Name: math score, dtype: float64

In [18]:
round(data.groupby('test preparation course')['reading score'].mean(), 2)

test preparation course
completed    74.18
none         66.42
Name: reading score, dtype: float64

In [19]:
round(data.groupby('test preparation course')['writing score'].mean(), 2)

test preparation course
completed    74.68
none         64.46
Name: writing score, dtype: float64

##### On average, students who have completed the Test Preparation Course scored more than others for Math, Reading & Writing

### 4. What do you think about the scoring variation for math, reading and writing based on
#### (a) Gender

In [21]:
round(data.groupby('gender')['math score'].std(), 2)

gender
female    16.03
male      14.56
Name: math score, dtype: float64

In [22]:
round(data.groupby('gender')['reading score'].std(), 2)

gender
female    14.41
male      14.15
Name: reading score, dtype: float64

In [23]:
round(data.groupby('gender')['writing score'].std(), 2)

gender
female    14.84
male      14.23
Name: writing score, dtype: float64

##### Based on gender, Female candidates have more scoring variation for Math, Reading & Writing compared to Male candidates

#### (b) Test preparation course

In [24]:
round(data.groupby('test preparation course')['math score'].std(), 2)

test preparation course
completed    14.52
none         15.71
Name: math score, dtype: float64

In [25]:
round(data.groupby('test preparation course')['reading score'].std(), 2)

test preparation course
completed    13.54
none         14.61
Name: reading score, dtype: float64

In [26]:
round(data.groupby('test preparation course')['writing score'].std(), 2)

test preparation course
completed    13.24
none         15.04
Name: writing score, dtype: float64

##### On average, students who have not completed Test Preparation Course shows more scoring variation than those who have completed

### 5. The management needs your help to give bonus points to the top 25% of students based on their maths score, so how will you help the management to achieve this.

##### Top 25% based on maths score means, students with maths score higher than 75th quantile (i.e. 77 marks and above)
##### Created a DataFrame 'bonus_data' from 'data' which has 3 new columns:-
##### (i) total_score = math score + reading score + writing score
##### (ii) bonus: initially set as 0
##### (iii) final_score = total_score + bonus

##### Method of alloting bonus score:-
##### Maximum score a student can score is 300 (math + reading + writing)
##### Maximum bonus score that can be allotted to a student is set as 5
##### Bonus score is allotted to students who scored a minimum of 77 marks in Math
##### (a) For students with total_score between 296 and 299, bonus = 300 - total_score
##### (b) For students with total_score = 300, no bonus points are given
##### (c) For students with total_score of 295 and less, bonus = 5
##### By following this method, the maximum score a student can score will be restricted to 300

In [138]:
top25 = data['math score'].quantile(q = 0.75)
top25

77.0

In [131]:
bonus_data = pd.DataFrame(data, columns = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course', 'math score', 'reading score', 'writing score', 'total_score', 'bonus', 'final_score'])

In [139]:
bonus_data['total_score'] = bonus_data['math score'] + bonus_data['reading score'] + bonus_data['writing score']
bonus_data['bonus'] = 0
bonus_data['final_score'] = bonus_data['total_score'] + bonus_data['bonus']
bonus_data

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,total_score,bonus,final_score
0,female,group C,some high school,free/reduced,none,0,17,10,27,0,27
1,female,group B,high school,free/reduced,none,8,24,23,55,0,55
2,female,group B,some high school,free/reduced,none,18,32,28,78,0,78
3,female,group B,some college,standard,none,11,38,32,81,0,81
4,female,group C,some college,free/reduced,none,22,39,33,94,0,94
...,...,...,...,...,...,...,...,...,...,...,...
995,male,group E,some college,standard,completed,99,87,81,267,0,267
996,male,group A,some college,standard,completed,100,96,86,282,0,282
997,male,group D,some college,standard,completed,100,97,99,296,0,296
998,male,group E,associate's degree,free/reduced,completed,100,100,93,293,0,293


In [140]:
bonus_data.loc[(bonus_data['math score'] > 76) & (bonus_data['total_score'] > 295) & (bonus_data['total_score'] < 300), 'bonus'] = 300-bonus_data['total_score']
bonus_data.loc[(bonus_data['math score'] > 76) & (bonus_data['total_score'] == 300), 'bonus'] = 0
bonus_data.loc[(bonus_data['math score'] > 76) & (bonus_data['total_score'] < 296), 'bonus'] = 5
bonus_data

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,total_score,bonus,final_score
0,female,group C,some high school,free/reduced,none,0,17,10,27,0,27
1,female,group B,high school,free/reduced,none,8,24,23,55,0,55
2,female,group B,some high school,free/reduced,none,18,32,28,78,0,78
3,female,group B,some college,standard,none,11,38,32,81,0,81
4,female,group C,some college,free/reduced,none,22,39,33,94,0,94
...,...,...,...,...,...,...,...,...,...,...,...
995,male,group E,some college,standard,completed,99,87,81,267,5,267
996,male,group A,some college,standard,completed,100,96,86,282,5,282
997,male,group D,some college,standard,completed,100,97,99,296,4,296
998,male,group E,associate's degree,free/reduced,completed,100,100,93,293,5,293
