In [126]:
import pandas as pd

students_data = pd.read_csv('StudentsPerformance.csv')

# First 15 rows of the DataFrame
students_data_head = students_data.head(15)

# Main information about the DataFrame
data_description = students_data.describe()

# Data types of the DataFrame columns
data_types = students_data.dtypes


students_data_head

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75
5,female,group B,associate's degree,standard,none,71,83,78
6,female,group B,some college,standard,completed,88,95,92
7,male,group B,some college,free/reduced,none,40,43,39
8,male,group D,high school,free/reduced,completed,64,64,67
9,female,group B,high school,free/reduced,none,38,60,50


In [127]:
# Information about the DataFrame
students_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


In [128]:
data_description

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [129]:
data_types

gender                         object
race/ethnicity                 object
parental level of education    object
lunch                          object
test preparation course        object
math score                      int64
reading score                   int64
writing score                   int64
dtype: object

In [130]:
# Subset of the DataFrame with the first 7 rows
subset_data = students_data[['gender', 'race/ethnicity', 'math score']].head(7).copy()

# New index for the subset
subset_data.index = ['std_1', 'std_2', 'std_3', 'std_4', 'std_5', 'std_6', 'std_7']

# New column names for the subset
subset_data.columns = ['Gender', 'Race/Ethnicity', 'Math Score']

# Adding a new row to the subset
subset_data.loc['std_8'] = ['male', 'group A', 88]

subset_data

Unnamed: 0,Gender,Race/Ethnicity,Math Score
std_1,female,group B,72
std_2,female,group C,69
std_3,female,group B,90
std_4,male,group A,47
std_5,male,group C,76
std_6,female,group B,71
std_7,female,group B,88
std_8,male,group A,88


In [131]:
# Finding the maximum value of the math score based on parental level of education
max_math_scores_by_parental_education = students_data.groupby('parental level of education')['math score'].max()

max_math_scores_by_parental_education

parental level of education
associate's degree    100
bachelor's degree     100
high school            99
master's degree        95
some college          100
some high school       97
Name: math score, dtype: int64

In [132]:
# Male students with reading score higher than 75 and completed test preparation course
male_students_prepared_with_75plus_reading = students_data[
    (students_data['gender'] == 'male') &
    (students_data['test preparation course'] == 'completed') &
    (students_data['reading score'] >= 75)
]

# Number of students
count_male_students_prepared_with_75plus_reading = male_students_prepared_with_75plus_reading.shape[0]

male_students_prepared_with_75plus_reading

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
35,male,group E,associate's degree,standard,completed,81,81,79
49,male,group C,high school,standard,completed,82,84,82
77,male,group A,bachelor's degree,standard,completed,80,78,81
95,male,group C,associate's degree,free/reduced,completed,78,81,82
104,male,group C,some college,standard,completed,98,86,90
...,...,...,...,...,...,...,...,...
934,male,group C,associate's degree,standard,completed,98,87,90
938,male,group D,some college,standard,completed,85,81,85
982,male,group B,some high school,standard,completed,79,85,86
987,male,group E,some high school,standard,completed,81,75,76


In [133]:
count_male_students_prepared_with_75plus_reading

69

In [134]:
# New column with average score for each student
students_data['average score'] = students_data[['math score', 'reading score', 'writing score']].mean(axis=1)

students_data_with_average = students_data.head()
students_data_with_average

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,average score
0,female,group B,bachelor's degree,standard,none,72,72,74,72.666667
1,female,group C,some college,standard,completed,69,90,88,82.333333
2,female,group B,master's degree,standard,none,90,95,93,92.666667
3,male,group A,associate's degree,free/reduced,none,47,57,44,49.333333
4,male,group C,some college,standard,none,76,78,75,76.333333


In [135]:
# New column with number of students in race/ethnicity
race_ethnicity_counts = students_data['race/ethnicity'].value_counts()

# Adding new column to the DataFrame
students_data['race/ethnicity count'] = students_data['race/ethnicity'].map(race_ethnicity_counts)

students_data_with_counts = students_data.head()
students_data_with_counts

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,average score,race/ethnicity count
0,female,group B,bachelor's degree,standard,none,72,72,74,72.666667,190
1,female,group C,some college,standard,completed,69,90,88,82.333333,319
2,female,group B,master's degree,standard,none,90,95,93,92.666667,190
3,male,group A,associate's degree,free/reduced,none,47,57,44,49.333333,89
4,male,group C,some college,standard,none,76,78,75,76.333333,319
