<a href="https://colab.research.google.com/github/SilvestreFer/pandas-data-analysis-project/blob/main/student_performance_pandas_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧮**STUDENT PERFORMANCE DATASET**

This notebook was created to practice fundamental Pandas operations using a small dataset containing students’ grades, ages, and approval status.

The analyses include exploring the dataset (using head(), tail(), and shape()), handling null values, adding and removing columns and rows, performing basic calculations (averages and sums), and inserting new data.

The main goal of this notebook is to strengthen data manipulation and exploration skills with Pandas through hands-on experimentation.

In [1]:
import pandas as pd

###**Loading the Data**

In [2]:
url = 'https://raw.githubusercontent.com/alura-cursos/pandas-conhecendo-a-biblioteca/main/desafios/alunos.csv'
students_df = pd.read_csv(url)
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20,7.5,True
1,Ana,18,,False
2,Cátia,27,2.5,False
3,Denis,18,5.0,False
4,Beto,21,10.0,True
5,Bruna,23,,False
6,Dara,21,7.0,True
7,Carlos,19,6.0,True
8,Alice,35,5.6,False
9,Vitor,28,,False


In [3]:
students_df.head(7)

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20,7.5,True
1,Ana,18,,False
2,Cátia,27,2.5,False
3,Denis,18,5.0,False
4,Beto,21,10.0,True
5,Bruna,23,,False
6,Dara,21,7.0,True


In [4]:
students_df.tail()

Unnamed: 0,Nome,Idade,Notas,Aprovado
13,Mirla,25,9.0,True
14,Paulo,37,,False
15,Mylena,29,7.0,True
16,Lucas,33,,False
17,Nadia,34,8.0,True


In [5]:
students_df.shape

(18, 4)

In [6]:
students_df.columns

Index(['Nome', 'Idade', 'Notas', 'Aprovado'], dtype='object')

In [7]:
students_df['Nome']

Unnamed: 0,Nome
0,Ary
1,Ana
2,Cátia
3,Denis
4,Beto
5,Bruna
6,Dara
7,Carlos
8,Alice
9,Vitor


In [8]:
students_df[['Idade', 'Notas']]

Unnamed: 0,Idade,Notas
0,20,7.5
1,18,
2,27,2.5
3,18,5.0
4,21,10.0
5,23,
6,21,7.0
7,19,6.0
8,35,5.6
9,28,


In [9]:
students_df.dtypes

Unnamed: 0,0
Nome,object
Idade,int64
Notas,float64
Aprovado,bool


In [10]:
students_df.describe()

Unnamed: 0,Idade,Notas
count,18.0,12.0
mean,25.5,6.8
std,6.070662,2.204953
min,18.0,2.5
25%,21.0,5.45
50%,24.5,7.0
75%,28.75,8.25
max,37.0,10.0


### **Calculating the mean (average) of the 'Age' column**

In [14]:
mean_age = students_df['Idade'].mean()
print(f'The mean age is {round(mean_age)}')

The mean age is 26


### Using **.describe()** and .loc ()

In [15]:
# Calculate a full set of descriptive statistics for all numeric columns.
statistics = students_df.describe()

# Access the 'mean' row and 'Age' column from the statistics DataFrame using .loc.
mean_age_loc = statistics.loc['mean', 'Idade']

# Print the result.
print(f'The mean age is {round(mean_age_loc)}')

The mean age is 26


### **Calculating the mean of the 'Grades' column**

In [None]:
# Using double brackets [['Grades']] returns a DataFrame (the result is a DataFrame of means).
mean_grades_df = students_df[['Notas']].mean()

# Print the result. Note that the output will show the column name and the mean value.
print(f'The mean grades is:\n{mean_grades_df}')

The mean grades is:
Notas    6.8
dtype: float64


###**Missing values**

In [16]:
students_df.isnull().sum()

Unnamed: 0,0
Nome,0
Idade,0
Notas,6
Aprovado,0


In [17]:
students_df.fillna(0, inplace=True)
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20,7.5,True
1,Ana,18,0.0,False
2,Cátia,27,2.5,False
3,Denis,18,5.0,False
4,Beto,21,10.0,True
5,Bruna,23,0.0,False
6,Dara,21,7.0,True
7,Carlos,19,6.0,True
8,Alice,35,5.6,False
9,Vitor,28,0.0,False


In [18]:
students_df.isnull().sum()

Unnamed: 0,0
Nome,0
Idade,0
Notas,0
Aprovado,0


### **Removing Students from the Dataset**

In [19]:
index_to_remove = students_df[students_df['Nome'].isin(['Alice', 'Carlos'])].index.tolist()
index_to_remove

[7, 8]

In [20]:
students_df.drop(index_to_remove, axis=0, inplace=True)

In [21]:
if 'Alice' in students_df['Nome'] or 'Carlos' in students_df['Nome']:
    print('Os alunos "Alice" e "Carlos" ainda estão na base de dados.')
else:
    print('Os alunos não encontrados na base de dados.')

Os alunos não encontrados na base de dados.


In [22]:
# Re-adding the students to apply a different removal method
students_df.loc[7, 'Nome'] = 'Alice'
students_df.loc[8, 'Nome'] = 'Carlos'
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
1,Ana,18.0,0.0,False
2,Cátia,27.0,2.5,False
3,Denis,18.0,5.0,False
4,Beto,21.0,10.0,True
5,Bruna,23.0,0.0,False
6,Dara,21.0,7.0,True
9,Vitor,28.0,0.0,False
10,Daniel,21.0,0.0,False
11,Igor,24.0,4.5,False


In [23]:
# Different removal method

students_df.drop([7, 8], axis=0, inplace=True)
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
1,Ana,18.0,0.0,False
2,Cátia,27.0,2.5,False
3,Denis,18.0,5.0,False
4,Beto,21.0,10.0,True
5,Bruna,23.0,0.0,False
6,Dara,21.0,7.0,True
9,Vitor,28.0,0.0,False
10,Daniel,21.0,0.0,False
11,Igor,24.0,4.5,False


In [24]:
# Different removal method

students_to_remove = students_df.query('Nome == "Alice" | Nome == "Carlos"').index
students_df.drop(students_to_remove, axis=0, inplace=True)
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
1,Ana,18.0,0.0,False
2,Cátia,27.0,2.5,False
3,Denis,18.0,5.0,False
4,Beto,21.0,10.0,True
5,Bruna,23.0,0.0,False
6,Dara,21.0,7.0,True
9,Vitor,28.0,0.0,False
10,Daniel,21.0,0.0,False
11,Igor,24.0,4.5,False


###**Filtering Approved Students**

In [25]:
approved_students_df = students_df[students_df['Aprovado'] == True]
approved_students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
4,Beto,21.0,10.0,True
6,Dara,21.0,7.0,True
12,Sthefanie,26.0,9.5,True
13,Mirla,25.0,9.0,True
15,Mylena,29.0,7.0,True
17,Nadia,34.0,8.0,True


### **Saving Approved Students to a CSV File**

In [26]:
approved_students_df.to_csv('lista_de_aprovados.csv', index=False)

In [27]:
pd.read_csv('lista_de_aprovados.csv')

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
1,Beto,21.0,10.0,True
2,Dara,21.0,7.0,True
3,Sthefanie,26.0,9.5,True
4,Mirla,25.0,9.0,True
5,Mylena,29.0,7.0,True
6,Nadia,34.0,8.0,True


### **Updating Grades from 7.0 to 8.0**

In [28]:
approved_students_df['Notas'].replace(7.0, 8.0)
approved_students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
4,Beto,21.0,10.0,True
6,Dara,21.0,7.0,True
12,Sthefanie,26.0,9.5,True
13,Mirla,25.0,9.0,True
15,Mylena,29.0,7.0,True
17,Nadia,34.0,8.0,True


In [31]:
# Saving the updated grades to the CSV file
approved_students_df.to_csv('lista_de_aprovados.csv', index=False)

In [32]:
pd.read_csv('lista_de_aprovados.csv')

Unnamed: 0,Nome,Idade,Notas,Aprovado
0,Ary,20.0,7.5,True
1,Beto,21.0,10.0,True
2,Dara,21.0,7.0,True
3,Sthefanie,26.0,9.5,True
4,Mirla,25.0,9.0,True
5,Mylena,29.0,7.0,True
6,Nadia,34.0,8.0,True


### **Adding Extra Credit Points**

The students participated in an extracurricular activity and earned extra points.  
These extra points correspond to **40% of their current grade**.  
A new column named **"Extra_points"** will be created to store these values.

In [33]:
students_df['Pontos_extras'] = students_df['Notas'].apply(lambda x: x* 0.4)
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado,Pontos_extras
0,Ary,20.0,7.5,True,3.0
1,Ana,18.0,0.0,False,0.0
2,Cátia,27.0,2.5,False,1.0
3,Denis,18.0,5.0,False,2.0
4,Beto,21.0,10.0,True,4.0
5,Bruna,23.0,0.0,False,0.0
6,Dara,21.0,7.0,True,2.8
9,Vitor,28.0,0.0,False,0.0
10,Daniel,21.0,0.0,False,0.0
11,Igor,24.0,4.5,False,1.8


### **Calculating Final Grades**

In [34]:
#Creating a new column named "Final_grades".

students_df['Notas_finais'] = students_df['Notas'] + students_df['Pontos_extras']
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado,Pontos_extras,Notas_finais
0,Ary,20.0,7.5,True,3.0,10.5
1,Ana,18.0,0.0,False,0.0,0.0
2,Cátia,27.0,2.5,False,1.0,3.5
3,Denis,18.0,5.0,False,2.0,7.0
4,Beto,21.0,10.0,True,4.0,14.0
5,Bruna,23.0,0.0,False,0.0,0.0
6,Dara,21.0,7.0,True,2.8,9.8
9,Vitor,28.0,0.0,False,0.0,0.0
10,Daniel,21.0,0.0,False,0.0,0.0
11,Igor,24.0,4.5,False,1.8,6.3


### **Calculating Final Approval Status**

Due to the extra points, some students who were previously not approved may now pass.  
Creating a new column named **"Final_approved"** with the following values:

- **True**: if the student's final grade is **greater than or equal to 6** (approved)  
- **False**: if the student's final grade is **less than 6** (not approved)


In [35]:
students_df['Aprovado_final'] = students_df['Notas_finais'].apply(lambda x: True if x >= 6 else False)
students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado,Pontos_extras,Notas_finais,Aprovado_final
0,Ary,20.0,7.5,True,3.0,10.5,True
1,Ana,18.0,0.0,False,0.0,0.0,False
2,Cátia,27.0,2.5,False,1.0,3.5,False
3,Denis,18.0,5.0,False,2.0,7.0,True
4,Beto,21.0,10.0,True,4.0,14.0,True
5,Bruna,23.0,0.0,False,0.0,0.0,False
6,Dara,21.0,7.0,True,2.8,9.8,True
9,Vitor,28.0,0.0,False,0.0,0.0,False
10,Daniel,21.0,0.0,False,0.0,0.0,False
11,Igor,24.0,4.5,False,1.8,6.3,True


### **Identifying Students Who Passed After Extra Points**


In [36]:
newly_approved_filter = (students_df['Aprovado'] == False) & (students_df['Aprovado_final'] == True)
students_df[newly_approved_filter]

Unnamed: 0,Nome,Idade,Notas,Aprovado,Pontos_extras,Notas_finais,Aprovado_final
3,Denis,18.0,5.0,False,2.0,7.0,True
11,Igor,24.0,4.5,False,1.8,6.3,True


In [43]:
# Updating approved students list

approved_students_df = students_df[students_df['Aprovado_final'] == True]
approved_students_df

Unnamed: 0,Nome,Idade,Notas,Aprovado,Pontos_extras,Notas_finais,Aprovado_final
0,Ary,20.0,7.5,True,3.0,10.5,True
3,Denis,18.0,5.0,False,2.0,7.0,True
4,Beto,21.0,10.0,True,4.0,14.0,True
6,Dara,21.0,7.0,True,2.8,9.8,True
11,Igor,24.0,4.5,False,1.8,6.3,True
12,Sthefanie,26.0,9.5,True,3.8,13.3,True
13,Mirla,25.0,9.0,True,3.6,12.6,True
15,Mylena,29.0,7.0,True,2.8,9.8,True
17,Nadia,34.0,8.0,True,3.2,11.2,True


In [44]:
# Saving updated list to CSV

approved_students_df.to_csv('lista_de_aprovados.csv', index=False)

In [45]:
pd.read_csv('lista_de_aprovados.csv')

Unnamed: 0,Nome,Idade,Notas,Aprovado,Pontos_extras,Notas_finais,Aprovado_final
0,Ary,20.0,7.5,True,3.0,10.5,True
1,Denis,18.0,5.0,False,2.0,7.0,True
2,Beto,21.0,10.0,True,4.0,14.0,True
3,Dara,21.0,7.0,True,2.8,9.8,True
4,Igor,24.0,4.5,False,1.8,6.3,True
5,Sthefanie,26.0,9.5,True,3.8,13.3,True
6,Mirla,25.0,9.0,True,3.6,12.6,True
7,Mylena,29.0,7.0,True,2.8,9.8,True
8,Nadia,34.0,8.0,True,3.2,11.2,True
