# Student Data
This notebook analyzes pass/fail data for a class

In [167]:
import numpy as np

data = [50,50,47,97,49,3]
grades = np.array(data)
study_hours = [10.0, 11.5, 9.0, 16.0, 9.25, 1.0]
student_data = np.array([study_hours, grades])
student_data[0][0]

10.0

In [168]:
import pandas as pd

df_students = pd.DataFrame({'Name': ['Dan', 'Joann', 'Pedro','Rosie', 'Ethan', 'Vicky'], 
                            'StudyHours': student_data[0],
                            'Grade': student_data[1]})

df_students

Unnamed: 0,Name,StudyHours,Grade
0,Dan,10.0,50.0
1,Joann,11.5,50.0
2,Pedro,9.0,47.0
3,Rosie,16.0,97.0
4,Ethan,9.25,49.0
5,Vicky,1.0,3.0


In [169]:
df_students.loc[0]

Name           Dan
StudyHours    10.0
Grade         50.0
Name: 0, dtype: object

In [170]:
df_students.loc[5, 'Grade']

3.0

In [171]:
df_students[df_students['Name'] == 'Vicky']

Unnamed: 0,Name,StudyHours,Grade
5,Vicky,1.0,3.0


In [172]:
df_students.query('Name=="Vicky"')

Unnamed: 0,Name,StudyHours,Grade
5,Vicky,1.0,3.0


In [173]:
df_students[df_students.Name=="Vicky"]

Unnamed: 0,Name,StudyHours,Grade
5,Vicky,1.0,3.0


In [174]:
df_Test = pd.read_csv('Test_data.csv', delimiter=',', header='infer')

In [175]:
df_Test

Unnamed: 0,Name,Age,Score
0,Alice,25,90.0
1,Bob,30,
2,Carol,27,78.0
3,David,26,65.0
4,Eve,22,
5,Frank,33,88.0
6,Grace,28,
7,Hannah,31,82.0
8,Ian,35,95.0
9,Julia,29,


In [176]:
df_Test.isnull()

Unnamed: 0,Name,Age,Score
0,False,False,False
1,False,False,True
2,False,False,False
3,False,False,False
4,False,False,True
5,False,False,False
6,False,False,True
7,False,False,False
8,False,False,False
9,False,False,True


In [177]:
df_Test.isnull().sum()

Name      0
Age       0
Score    11
dtype: int64

In [178]:
#Filling the null values
df_Test.Score = df_Test.Score.fillna(df_Test.Score.mean())

In [179]:
df_Test.isnull().sum()

Name     0
Age      0
Score    0
dtype: int64

In [180]:
#Get the mean age using to column name as an index
mean_age = df_Test['Age'].mean()
mean_score = df_Test['Score'].mean()

#Print the mean age and the mean score
print(f'Average age: {mean_age:.2f}\nAverage score: {mean_score:.2f}')

Average age: 28.50
Average score: 80.87


In [181]:
# Finding the student who is younger than the average age
df_Test[df_Test.Age < mean_age]

Unnamed: 0,Name,Age,Score
0,Alice,25,90.0
2,Carol,27,78.0
3,David,26,65.0
4,Eve,22,80.866667
6,Grace,28,80.866667
11,Laura,24,80.866667
13,Nina,26,80.866667
15,Paul,23,64.0
17,Rachel,28,70.0
18,Steve,27,92.0


In [182]:
# Finding the average grade value of the students whose age is younger than the average
df_Test[df_Test.Age < mean_age].Score.mean()

79.24615384615385

In [183]:
# Setting the condition where students whose grade above 60 are passed
Coundition_Passed = pd.Series(df_Test.Score >= 85)
df_Test = pd.concat([df_Test, Coundition_Passed.rename('Passed')], axis=1)

In [184]:
df_Test

Unnamed: 0,Name,Age,Score,Passed
0,Alice,25,90.0,True
1,Bob,30,80.866667,False
2,Carol,27,78.0,False
3,David,26,65.0,False
4,Eve,22,80.866667,False
5,Frank,33,88.0,True
6,Grace,28,80.866667,False
7,Hannah,31,82.0,False
8,Ian,35,95.0,True
9,Julia,29,80.866667,False


In [185]:
df_Test.groupby(df_Test.Passed).count()

Unnamed: 0_level_0,Name,Age,Score
Passed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,19,19,19
True,7,7,7


In [186]:
df_Test.groupby(df_Test.Passed).Name.count()

Passed
False    19
True      7
Name: Name, dtype: int64

In [187]:
df_Test.groupby(df_Test.Passed).Score.mean()

Passed
False    77.764912
True     89.285714
Name: Score, dtype: float64

In [188]:
df_Test.groupby(df_Test.Passed)['Age', 'Score'].count()

  df_Test.groupby(df_Test.Passed)['Age', 'Score'].count()


Unnamed: 0_level_0,Age,Score
Passed,Unnamed: 1_level_1,Unnamed: 2_level_1
False,19,19
True,7,7


In [189]:
#Sort value by column name with ascending order
df_Test.sort_values('Score')

Unnamed: 0,Name,Age,Score,Passed
15,Paul,23,64.0,False
3,David,26,65.0,False
17,Rachel,28,70.0,False
22,Wendy,29,74.0,False
10,Kevin,31,76.0,False
2,Carol,27,78.0,False
14,Oscar,34,79.0,False
25,Zoe,26,80.866667,False
16,Quinn,32,80.866667,False
13,Nina,26,80.866667,False


In [190]:
#Sort value by column name with descending order
df_Test.sort_values('Score', ascending=False)

Unnamed: 0,Name,Age,Score,Passed
8,Ian,35,95.0,True
18,Steve,27,92.0,True
0,Alice,25,90.0,True
24,Yara,32,89.0,True
5,Frank,33,88.0,True
20,Uma,25,86.0,True
12,Mike,30,85.0,True
7,Hannah,31,82.0,False
1,Bob,30,80.866667,False
23,Xander,33,80.866667,False
