# Students Performance

Notebook de análisis de datos enfocado a los factores socioeconómicos que afectan rendimiento académico a la hora de presentar exámenes.


In [1]:
import pandas as pd
import numpy as np
import altair as alt

# Carga de datos

In [3]:
data = pd.read_csv('../data/StudentsPerformance.csv',encoding="ISO-8859-1" )
data.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


# EDA

Los nombres de las columnas son demasido largos a mi parecer, asi que renombrare algunas columnas 

In [4]:
data.rename(columns={"race/ethnicity":"ethnicity","parental level of education":"parental_education","math score":"math","reading score":"reading","writing score":"writing",
                    "test preparation course":"course"},inplace=True)
data.head()

Unnamed: 0,gender,ethnicity,parental_education,lunch,course,math,reading,writing
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


Comenzando con el EDA, veo la información de mis datos, busco valores faltates e información duplicada

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   gender              1000 non-null   object
 1   ethnicity           1000 non-null   object
 2   parental_education  1000 non-null   object
 3   lunch               1000 non-null   object
 4   course              1000 non-null   object
 5   math                1000 non-null   int64 
 6   reading             1000 non-null   int64 
 7   writing             1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


In [8]:
data.duplicated().sum()      

0

Al tratarse de un dataset de práctica para principiantes los datos ya estaban limpios por lo que por esa parte no queda mucho que hacer.

## Primeras visualizaciones

Comenzamos explorando nuestras columnas y ver de entrada elcontexto de la situación de la muestra en estudio.

In [5]:
gender_count = data['gender'].value_counts().to_frame().reset_index().rename(columns={'index':'gender','gender':'count'}) #Agrupo por genero y hago conteo
gender_count['percent'] = (gender_count['count'] / gender_count['count'].sum()) #Saco el promedio
gender_count 

Unnamed: 0,gender,count,percent
0,female,518,0.518
1,male,482,0.482


In [6]:
 bars = alt.Chart(gender_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("gender:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('gender:O',title = 'Gender' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="gender", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

(bars + text).properties(
    title = f"Distribution of Gender",
    width = 600,
    height= 150,
).configure_title(
    fontSize = 20,
    font = "Arial",
).configure_view(
    strokeWidth = 0,)

In [12]:
ethnicity_count = data['ethnicity'].value_counts().to_frame().reset_index().rename(columns={'index':'ethnicity','ethnicity':'count'})
ethnicity_count['percent'] = (ethnicity_count['count'] / ethnicity_count['count'].sum())
ethnicity_count 

Unnamed: 0,ethnicity,count,percent
0,group C,319,0.319
1,group D,262,0.262
2,group B,190,0.19
3,group E,140,0.14
4,group A,89,0.089


In [13]:
 bars = alt.Chart(ethnicity_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("ethnicity:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('ethnicity:O',title = 'Ethnicity' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="ethnicity", type="nominal",legend=None ),
)

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

(bars + text).properties(
    title = f"Distribution of Ethnicity",
    width = 700,
    height= 150,
).configure_title(
    fontSize = 20,
    font = "Arial",
).configure_view(
    strokeWidth = 0,
) 

In [14]:
pEducation_count = data['parental_education'].value_counts().to_frame().reset_index().rename(columns={'index':'parental_education','parental_education':'count'})
pEducation_count['percent'] = (pEducation_count['count'] / pEducation_count['count'].sum())
pEducation_count

Unnamed: 0,parental_education,count,percent
0,some college,226,0.226
1,associate's degree,222,0.222
2,high school,196,0.196
3,some high school,179,0.179
4,bachelor's degree,118,0.118
5,master's degree,59,0.059


In [15]:
 bars = alt.Chart(pEducation_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("parental_education:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('parental_education:O',title = 'Parental education' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="parental_education", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

(bars + text).properties(
    title = f"Distribution of Parental Education",
    width = 600,
    height= 150,
).configure_title(
    fontSize = 20,
    font = "Arial",
).configure_view(
    strokeWidth = 0,)

In [16]:
lunch_count = data['lunch'].value_counts().to_frame().reset_index().rename(columns={'index':'lunch','lunch':'count'})
lunch_count['percent'] = (lunch_count['count'] / lunch_count['count'].sum())
lunch_count 

Unnamed: 0,lunch,count,percent
0,standard,645,0.645
1,free/reduced,355,0.355


In [17]:
 bars = alt.Chart(lunch_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("lunch:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('lunch:O',title = 'Lunch' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="lunch", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

(bars + text).properties(
    title = f"Distribution of Lunch",
    width = 600,
    height= 150,
).configure_title(
    fontSize = 20,
    font = "Arial",
).configure_view(
    strokeWidth = 0,)

In [18]:
course_count = data['course'].value_counts().to_frame().reset_index().rename(columns={'index':'course','course':'count'})
course_count['percent'] = (course_count['count'] / course_count['count'].sum())
course_count 

Unnamed: 0,course,count,percent
0,none,642,0.642
1,completed,358,0.358


In [19]:
 bars = alt.Chart(course_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("course:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('course:O',title = 'Preparation course' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="course", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

(bars + text).properties(
    title = f"Distribution of the Preparation Course",
    width = 600,
    height= 150,
).configure_title(
    fontSize = 20,
    font = "Arial",
).configure_view(
    strokeWidth = 0,)

## Students Performance

Comienzo a desarrollar gráficos para ver el rendimiento de los estudiantes ante las circunstancias en las que los diferentes grupos estuvieron expuestos, partimos buscar el rendimiento de cada grupo según el puntaje obtenido de cada asignatura.

In [7]:
gender_math = data.groupby('gender')['math'].mean().to_frame().reset_index() # promedio de cada asignatura segun su genero
gender_reading = data.groupby('gender')['reading'].mean().to_frame().reset_index()
gender_writing = data.groupby('gender')['writing'].mean().to_frame().reset_index()

In [8]:
 bars = alt.Chart(gender_math).mark_bar(stroke = "black").encode(
    x= alt.X("gender:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('gender', title = 'Gender' ), alt.Tooltip ('math:O',title = 'Math Score', format = ".4s" )],
    color=alt.Color(field="gender", type="nominal",legend=None ),
    y= alt.Y("math:Q", 
        title = "Math", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('math:Q', format = ".3s"))

g_math = (bars + text).properties(
    width = 300,
)
g_math

In [9]:
 bars = alt.Chart(gender_reading).mark_bar(stroke = "black").encode(
    x= alt.X("gender:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('gender', title = 'Gender' ), alt.Tooltip ('reading:O',title = 'Reading Score', format = ".4s" )],
    color=alt.Color(field="gender", type="nominal",legend=None ),
    y= alt.Y("reading:Q", 
        title = "Reading", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('reading:Q', format = ".3s"))

g_reading = (bars + text).properties(
    width = 300,
)
g_reading

In [10]:
 bars = alt.Chart(gender_writing).mark_bar(stroke = "black").encode(
    x= alt.X("gender:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('gender', title = 'Gender' ), alt.Tooltip ('writing:O',title = 'Writing Score', format = ".4s" )],
    color=alt.Color(field="gender", type="nominal",legend=None ),
    y= alt.Y("writing:Q", 
        title = "Writing", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('writing:Q', format = ".3s"))

g_writing = (bars + text).properties(
    width = 300,
)
g_writing

In [11]:
###Aqui todos juntos
alt.hconcat(g_math, g_reading, g_writing ).properties(
    title = 'Performance by Gender',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [12]:
ethnicity_math = data.groupby('ethnicity')['math'].mean().to_frame().reset_index() # promedio de cada asignatura segun su etnia
ethnicity_reading = data.groupby('ethnicity')['reading'].mean().to_frame().reset_index()
ethnicity_writing = data.groupby('ethnicity')['writing'].mean().to_frame().reset_index()

In [13]:
 bars = alt.Chart(ethnicity_writing).mark_bar(stroke = "black").encode(
    x= alt.X("ethnicity:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('ethnicity', title = 'Ethnicity' ), alt.Tooltip ('writing:O',title = 'Writing Score', format = ".4s" )],
    color=alt.Color(field="ethnicity", type="nominal",legend=None ),
    y= alt.Y("writing:Q", 
        title = "Writing", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('writing:Q', format = ".3s"))

e_writing = (bars + text).properties(
    width = 300,)

e_writing

In [14]:
 bars = alt.Chart(ethnicity_reading).mark_bar(stroke = "black").encode(
    x= alt.X("ethnicity:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('ethnicity', title = 'Ethnicity' ), alt.Tooltip ('reading:O',title = 'Reading Score', format = ".4s" )],
    color=alt.Color(field="ethnicity", type="nominal",legend=None ),
    y= alt.Y("reading:Q", 
        title = "Reading", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('reading:Q', format = ".3s"))

e_reading = (bars + text).properties(
    width = 300,
)
e_reading

In [15]:
 bars = alt.Chart(ethnicity_math).mark_bar(stroke = "black").encode(
    x= alt.X("ethnicity:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('ethnicity', title = 'Ethnicity' ), alt.Tooltip ('math:O',title = 'Math Score', format = ".4s" )],
    color=alt.Color(field="ethnicity", type="nominal",legend=None ),
    y= alt.Y("math:Q", 
        title = "Math", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('math:Q', format = ".3s"))

e_math = (bars + text).properties(
    width = 300,
)
e_math

In [16]:
###Aqui todos juntos
alt.hconcat(e_math, e_reading, e_writing ).properties(
    title = 'Performance by Ethnicity ',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [17]:
pEducation_math = data.groupby('parental_education')['math'].mean().to_frame().reset_index() # promedio de cada asignatura segun el nivel de eduacion de sus padres
PEducation_reading = data.groupby('parental_education')['reading'].mean().to_frame().reset_index()
pEducation_writing = data.groupby('parental_education')['writing'].mean().to_frame().reset_index()

In [18]:
 bars = alt.Chart(pEducation_math).mark_bar(stroke = "black").encode(
    x= alt.X("parental_education:O", 
        title="",
        axis = alt.Axis(labelAngle=325, labelFontSize=11, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('parental_education', title = 'Parental Level of Lducation' ), alt.Tooltip ('math:O',title = 'Math Score', format = ".4s" )],
    color=alt.Color(field="parental_education", type="nominal",legend=None ),
    y= alt.Y("math:Q", 
        title = "Math", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('math:Q', format = ".3s")) 

pe_math = (bars + text).properties(
    width = 300,
)
pe_math

In [19]:
 bars = alt.Chart(PEducation_reading).mark_bar(stroke = "black").encode(
    x= alt.X("parental_education:O", 
        title="",
        axis = alt.Axis(labelAngle=325, labelFontSize=11, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('parental_education', title = 'Parental Level of Lducation' ), alt.Tooltip ('reading:O',title = 'Reading Score', format = ".4s" )],
    color=alt.Color(field="parental_education", type="nominal",legend=None ),
    y= alt.Y("reading:Q", 
        title = "Reading", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('reading:Q', format = ".3s"))

pe_reading = (bars + text).properties(
    width = 300,
)
pe_reading

In [20]:
 bars = alt.Chart(pEducation_writing).mark_bar(stroke = "black").encode(
    x= alt.X("parental_education:O", 
        title="",
        axis = alt.Axis(labelAngle=325, labelFontSize=11, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('parental_education', title = 'Parental Level of Lducation' ), alt.Tooltip ('writing:O',title = 'Writing Score', format = ".4s" )],
    color=alt.Color(field="parental_education", type="nominal",legend=None ),
    y= alt.Y("writing:Q", 
        title = "Writing", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('writing:Q', format = ".3s"))

pe_writing = (bars + text).properties(
    width = 300,
)
pe_writing

In [21]:
alt.hconcat(pe_math, pe_reading, pe_writing ).properties(
    title = "Performance according to the Parents' Level of Education", #performance - parental level of education 
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [22]:
lunch_math = data.groupby('lunch')['math'].mean().to_frame().reset_index()# promedio de cada asignatura segun su almuerzo
lunch_reading = data.groupby('lunch')['reading'].mean().to_frame().reset_index()
lunch_writing = data.groupby('lunch')['writing'].mean().to_frame().reset_index()

In [23]:
 bars = alt.Chart(lunch_math).mark_bar(stroke = "black").encode(
    x= alt.X("lunch:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('lunch', title = 'Lunch' ), alt.Tooltip ('math:O',title = 'Math Score', format = ".4s" )],
    color=alt.Color(field="lunch", type="nominal",legend=None ),
    y= alt.Y("math:Q", 
        title = "Math", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('math:Q', format = ".3s"))

l_math = (bars + text).properties(
    width = 300,
)
l_math

In [24]:
 bars = alt.Chart(lunch_reading).mark_bar(stroke = "black").encode(
    x= alt.X("lunch:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('lunch', title = 'Lunch' ), alt.Tooltip ('reading:O',title = 'Reading Score', format = ".4s" )],
    color=alt.Color(field="lunch", type="nominal",legend=None ),
    y= alt.Y("reading:Q", 
        title = "Reading", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('reading:Q', format = ".3s"))

l_reading = (bars + text).properties(
    width = 300,
)
l_reading

In [25]:
 bars = alt.Chart(lunch_writing).mark_bar(stroke = "black").encode(
    x= alt.X("lunch:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('lunch', title = 'Lunch' ), alt.Tooltip ('writing:O',title = 'Writing Score', format = ".4s" )],
    color=alt.Color(field="lunch", type="nominal",legend=None ),
    y= alt.Y("writing:Q", 
        title = "Writing", 
        axis = alt.Axis( grid = False, )),
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('writing:Q', format = ".3s")) 

l_writing = (bars + text).properties(
    width = 300,
)
l_writing

In [29]:
alt.hconcat(l_math, l_reading, l_writing ).properties(
    title = 'Performance - Lunch ',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [30]:
course_math = data.groupby('course')['math'].mean().to_frame().reset_index()# promedio de cada asignatura de acuerdo a si tomaron el curso de preparación
course_reading = data.groupby('course')['reading'].mean().to_frame().reset_index()
course_writing = data.groupby('course')['writing'].mean().to_frame().reset_index()

In [31]:
 bars = alt.Chart(course_math).mark_bar(stroke = "black").encode(
    x= alt.X("course:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('course', title = 'Test Preparation Course' ), alt.Tooltip ('math:O',title = 'Math Score', format = ".4s" )],
    color=alt.Color(field="course", type="nominal",legend=None ),
    y= alt.Y("math:Q", 
        title = "Math", 
        axis = alt.Axis( grid = False, )),#format='.0%',
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('math:Q', format = ".3s"))#format='.0%' 

c_math = (bars + text).properties(
    #title = f"Math Score",
    width = 300,
    #height= 150,
)
c_math

In [32]:
 bars = alt.Chart(course_reading).mark_bar(stroke = "black").encode(
    x= alt.X("course:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('course', title = 'Test Preparation Course' ), alt.Tooltip ('reading:O',title = 'Reading Score', format = ".4s" )],
    color=alt.Color(field="course", type="nominal",legend=None ),
    y= alt.Y("reading:Q", 
        title = "Reading", 
        axis = alt.Axis( grid = False, )),#format='.0%',
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('reading:Q', format = ".3s"))#format='.0%' 

c_reading = (bars + text).properties(
    #title = f"Reading Score",
    width = 300,
    #height= 150,
)
c_reading

In [33]:
 bars = alt.Chart(course_writing).mark_bar(stroke = "black").encode(
    x= alt.X("course:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('course', title = 'Test Preparation Course' ), alt.Tooltip ('writing:O',title = 'Writing Score', format = ".4s" )],
    color=alt.Color(field="course", type="nominal",legend=None ),
    y= alt.Y("writing:Q", 
        title = "Writing", 
        axis = alt.Axis( grid = False, )),#format='.0%',
 )

text = bars.mark_text(
    dy=-10,
    size=14,
    fill = "black",
).encode(text= alt.Text('writing:Q', format = ".3s"))#format='.0%' 

c_writing = (bars + text).properties(
    #title = f"Writing Score",
    width = 300,
    #height= 150,
)
c_writing

In [34]:
alt.hconcat(c_math, c_reading, c_writing ).properties(
    title = 'Performance with Test Preparation Course',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

## Group A vs Group E

En este caso me llamó mucho la atencion en la diferencia del promedio deacurdo a la etnia, donde destaca el grupo A como el peor y el grupo Ecomo el mejor, por lo que quise indagar un poco más en el brackgroud de cada grupo.

Preparamos datos para A y E

In [58]:
data_a = data [(data["ethnicity"] == "group A")]
a_gender_count = data_a['gender'].value_counts().to_frame().reset_index().rename(columns={'index':'gender','gender':'count'})
a_gender_count['percent'] = (a_gender_count['count'] / a_gender_count['count'].sum()) #*100
a_gender_count 

Unnamed: 0,gender,count,percent
0,male,53,0.595506
1,female,36,0.404494


In [61]:
data_e = data [(data["ethnicity"] == "group E")]
e_gender_count = data_e['gender'].value_counts().to_frame().reset_index().rename(columns={'index':'gender','gender':'count'})
e_gender_count['percent'] = (e_gender_count['count'] / e_gender_count['count'].sum()) #*100
e_gender_count 

Unnamed: 0,gender,count,percent
0,male,71,0.507143
1,female,69,0.492857


In [68]:
 bars = alt.Chart(a_gender_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "Group A", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("gender:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('gender:O',title = 'Gender' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="gender", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

g_gA = (bars + text).properties(
    width = 600,
    height= 150,
)

In [69]:
 bars = alt.Chart(e_gender_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "Group E", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("gender:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('gender:O',title = 'Gender' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="gender", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

g_gE = (bars + text).properties(
    width = 600,
    height= 150,
)

In [73]:
alt.vconcat(g_gA, g_gE,).properties(
    title = 'Gender - Group A vs Group E ',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [74]:
a_pEducation_count = data_a['parental_education'].value_counts().to_frame().reset_index().rename(columns={'index':'parental_education','parental_education':'count'})
a_pEducation_count['percent'] = (a_pEducation_count['count'] / a_pEducation_count['count'].sum())
a_pEducation_count 

Unnamed: 0,parental_education,count,percent
0,some high school,24,0.269663
1,some college,18,0.202247
2,high school,18,0.202247
3,associate's degree,14,0.157303
4,bachelor's degree,12,0.134831
5,master's degree,3,0.033708


In [75]:
e_pEducation_count = data_e['parental_education'].value_counts().to_frame().reset_index().rename(columns={'index':'parental_education','parental_education':'count'})
e_pEducation_count['percent'] = (e_pEducation_count['count'] / e_pEducation_count['count'].sum())
e_pEducation_count 

Unnamed: 0,parental_education,count,percent
0,associate's degree,39,0.278571
1,some college,35,0.25
2,high school,22,0.157143
3,bachelor's degree,18,0.128571
4,some high school,18,0.128571
5,master's degree,8,0.057143


In [76]:
 bars = alt.Chart(a_pEducation_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "Group A", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("parental_education:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('parental_education:O',title = 'Parental education' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="parental_education", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

pe_gA = (bars + text).properties(
    width = 600,
    height= 150,
)

In [77]:
 bars = alt.Chart(e_pEducation_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "group E", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("parental_education:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('parental_education:O',title = 'Parental education' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="parental_education", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

pe_gE = (bars + text).properties(
    width = 600,
    height= 150,
)

In [78]:
alt.vconcat(pe_gA, pe_gE,).properties(
    title = 'Parental Education Group A vs Group E ',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [84]:
a_lunch_count = data_a ['lunch'].value_counts().to_frame().reset_index().rename(columns={'index':'lunch','lunch':'count'})
a_lunch_count['percent'] = (a_lunch_count['count'] / a_lunch_count['count'].sum())
a_lunch_count

Unnamed: 0,lunch,count,percent
0,standard,53,0.595506
1,free/reduced,36,0.404494


In [86]:
e_lunch_count = data_e ['lunch'].value_counts().to_frame().reset_index().rename(columns={'index':'lunch','lunch':'count'})
e_lunch_count['percent'] = (e_lunch_count['count'] / e_lunch_count['count'].sum())
e_lunch_count

Unnamed: 0,lunch,count,percent
0,standard,99,0.707143
1,free/reduced,41,0.292857


In [87]:
 bars = alt.Chart(a_lunch_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "group A", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("lunch:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('lunch:O',title = 'Lunch' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="lunch", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

l_gA = (bars + text).properties(
    width = 600,
    height= 150,
)

In [88]:
 bars = alt.Chart(e_lunch_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "group E", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("lunch:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('lunch:O',title = 'Lunch' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="lunch", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

l_gE = (bars + text).properties(
    width = 600,
    height= 150,
)

In [89]:
alt.vconcat(l_gA, l_gE,).properties(
    title = 'Lunch - Group A vs Group E ',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)

In [90]:
a_course_count = data_a ['course'].value_counts().to_frame().reset_index().rename(columns={'index':'course','course':'count'})
a_course_count['percent'] = (a_course_count['count'] / a_course_count['count'].sum())
a_course_count

Unnamed: 0,course,count,percent
0,none,58,0.651685
1,completed,31,0.348315


In [91]:
e_course_count = data_e ['course'].value_counts().to_frame().reset_index().rename(columns={'index':'course','course':'count'})
e_course_count['percent'] = (e_course_count['count'] / e_course_count['count'].sum())
e_course_count

Unnamed: 0,course,count,percent
0,none,80,0.571429
1,completed,60,0.428571


In [92]:
 bars = alt.Chart(a_course_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "group A", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("course:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('course:O',title = 'Preparation course' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="course", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

c_gA =(bars + text).properties(
    width = 600,
    height= 150,
)

In [93]:
 bars = alt.Chart(e_course_count).mark_bar(stroke = "white").encode(
    x= alt.X("percent:Q", 
        title = "group E", 
        axis = alt.Axis(format='.0%', grid = False, )),
    y= alt.Y("course:O", 
        title="",
        axis = alt.Axis(labelAngle=0, labelFontSize=14, labelColor= "#343a40", labelFont="Arial") ),    
    tooltip = [alt.Tooltip('count', title = 'Count' ), alt.Tooltip ('course:O',title = 'Preparation course' ) , alt.Tooltip('percent:Q', format='.1%', title = 'Percent' )],
    color=alt.Color(field="course", type="nominal",legend=None ))

text = bars.mark_text(
    dx=+20,
    size=16,
    fill = "black",
).encode(text= alt.Text('percent:Q', format='.0%'))

c_gE = (bars + text).properties(
    width = 600,
    height= 150,
)

In [95]:
alt.vconcat(c_gA, c_gE,).properties(
    title = 'Test Preparation - Course Group A vs Group E',
).configure_title(
    fontSize = 18,
    font = "Arial",
    anchor = "middle"
).configure_view(
    strokeWidth = 0,
)