In [1]:
import pandas as pd
data = {
    'First Name': ['Alice', 'Bob', 'Charlie', 'David', 'Alice', 'Bob', 'Alice' , 'Alice' , 'Alice'],
    'Last Name' : ['Keza', 'Manzi', 'Twahirwa', 'Gasana', 'Keza', 'Manzi', 'Uwimana' , 'Uwimana' , 'Batamuriza'],
    'Subject': ['Math', 'Math', 'Science', 'Science', 'Math', 'Science', 'Math', 'Science', 'Math'],
    'Score': [85, 75, 90, 80, 95, 85, 60, 70, 90]
}

df = pd.DataFrame(data)

In [2]:
df.groupby(by='First Name').sum()

Unnamed: 0_level_0,Last Name,Subject,Score
First Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,KezaKezaUwimanaUwimanaBatamuriza,MathMathMathScienceMath,400
Bob,ManziManzi,MathScience,160
Charlie,Twahirwa,Science,90
David,Gasana,Science,80


In [3]:
df.groupby(by=['First Name','Last Name']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Subject,Score
First Name,Last Name,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,Batamuriza,Math,90
Alice,Keza,MathMath,180
Alice,Uwimana,MathScience,130
Bob,Manzi,MathScience,160
Charlie,Twahirwa,Science,90
David,Gasana,Science,80


# aggregation 

In [4]:
df.groupby('Subject').agg(MaxScore=('Score', 'max'), MinScore=('Score','min'))

Unnamed: 0_level_0,MaxScore,MinScore
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1
Math,95,60
Science,90,70


In [5]:
def add_bonus(marks):
    return sum(marks) + sum(marks) * 10/100


In [6]:
df.groupby(['First Name', 'Last Name'], as_index=False).agg(
    MaxScore = ('Score', 'max'),
    MinScore = ('Score', min),
    NumberOfSubject = ('Subject', 'count'),
    TotalScore = ('Score','sum'),
    ScoreWithBonus = ('Score', add_bonus)
)

  df.groupby(['First Name', 'Last Name'], as_index=False).agg(


Unnamed: 0,First Name,Last Name,MaxScore,MinScore,NumberOfSubject,TotalScore,ScoreWithBonus
0,Alice,Batamuriza,90,90,1,90,99.0
1,Alice,Keza,95,85,2,180,198.0
2,Alice,Uwimana,70,60,2,130,143.0
3,Bob,Manzi,85,75,2,160,176.0
4,Charlie,Twahirwa,90,90,1,90,99.0
5,David,Gasana,80,80,1,80,88.0


#Pivot

In [7]:
import pandas as pd
data = {
    'First Name': ['Alice', 'Bob', 'Charlie', 'David', 'Alice', 'Bob', 'Alice' , 'Alice' , 'Alice']*2,
    'Last Name' : ['Keza', 'Manzi', 'Twahirwa', 'Gasana', 'Keza', 'Manzi', 'Uwimana' , 'Uwimana' , 'Batamuriza']*2,
    'Subject': ['Math', 'Math', 'Science', 'Science', 'Math', 'Science', 'Math', 'Science', 'Math']*2,
    'Academic Year' : [2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023,2023,2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024],
    'Score': [85, 75, 90, 80, 95, 85, 60, 70, 85, 75, 90, 90, 73, 94, 90, 85, 60, 80]
}

df = pd.DataFrame(data)


In [8]:
df

Unnamed: 0,First Name,Last Name,Subject,Academic Year,Score
0,Alice,Keza,Math,2023,85
1,Bob,Manzi,Math,2023,75
2,Charlie,Twahirwa,Science,2023,90
3,David,Gasana,Science,2023,80
4,Alice,Keza,Math,2023,95
5,Bob,Manzi,Science,2023,85
6,Alice,Uwimana,Math,2023,60
7,Alice,Uwimana,Science,2023,70
8,Alice,Batamuriza,Math,2023,85
9,Alice,Keza,Math,2024,75


In [9]:
pivoted = df.pivot_table(index=['First Name', 'Last Name', 'Subject'], columns = 'Academic Year', values='Score')

In [10]:
pivoted.columns

Index([2023, 2024], dtype='int64', name='Academic Year')

In [11]:
pivoted.columns.name = None
pivoted.reset_index(inplace=True)

In [12]:
pivoted

Unnamed: 0,First Name,Last Name,Subject,2023,2024
0,Alice,Batamuriza,Math,85.0,80.0
1,Alice,Keza,Math,90.0,84.5
2,Alice,Uwimana,Math,60.0,85.0
3,Alice,Uwimana,Science,70.0,60.0
4,Bob,Manzi,Math,75.0,90.0
5,Bob,Manzi,Science,85.0,90.0
6,Charlie,Twahirwa,Science,90.0,90.0
7,David,Gasana,Science,80.0,73.0


In [13]:
def  check_change(change):
    if change == 0:
        return 'STABLE'
    elif change > 0:
        return 'INCREASE'
    else:
        return 'DECREASE'
    
pivoted['change'] = pivoted[2023] - pivoted[2024]
pivoted['Class Changes'] = pivoted['change'].apply(check_change)
pivoted

Unnamed: 0,First Name,Last Name,Subject,2023,2024,change,Class Changes
0,Alice,Batamuriza,Math,85.0,80.0,5.0,INCREASE
1,Alice,Keza,Math,90.0,84.5,5.5,INCREASE
2,Alice,Uwimana,Math,60.0,85.0,-25.0,DECREASE
3,Alice,Uwimana,Science,70.0,60.0,10.0,INCREASE
4,Bob,Manzi,Math,75.0,90.0,-15.0,DECREASE
5,Bob,Manzi,Science,85.0,90.0,-5.0,DECREASE
6,Charlie,Twahirwa,Science,90.0,90.0,0.0,STABLE
7,David,Gasana,Science,80.0,73.0,7.0,INCREASE


# Melt

In [14]:
pivoted['Change Lambda'] = pivoted.apply(lambda x: x[2024] - x[2023], axis=1)


In [15]:
pivoted

Unnamed: 0,First Name,Last Name,Subject,2023,2024,change,Class Changes,Change Lambda
0,Alice,Batamuriza,Math,85.0,80.0,5.0,INCREASE,-5.0
1,Alice,Keza,Math,90.0,84.5,5.5,INCREASE,-5.5
2,Alice,Uwimana,Math,60.0,85.0,-25.0,DECREASE,25.0
3,Alice,Uwimana,Science,70.0,60.0,10.0,INCREASE,-10.0
4,Bob,Manzi,Math,75.0,90.0,-15.0,DECREASE,15.0
5,Bob,Manzi,Science,85.0,90.0,-5.0,DECREASE,5.0
6,Charlie,Twahirwa,Science,90.0,90.0,0.0,STABLE,0.0
7,David,Gasana,Science,80.0,73.0,7.0,INCREASE,-7.0


In [16]:
pivoted.melt(id_vars=['First Name', 'Last Name', 'Subject'], value_vars=[2023, 2024],)

Unnamed: 0,First Name,Last Name,Subject,variable,value
0,Alice,Batamuriza,Math,2023,85.0
1,Alice,Keza,Math,2023,90.0
2,Alice,Uwimana,Math,2023,60.0
3,Alice,Uwimana,Science,2023,70.0
4,Bob,Manzi,Math,2023,75.0
5,Bob,Manzi,Science,2023,85.0
6,Charlie,Twahirwa,Science,2023,90.0
7,David,Gasana,Science,2023,80.0
8,Alice,Batamuriza,Math,2024,80.0
9,Alice,Keza,Math,2024,84.5


In [17]:
math_scores = pd.DataFrame({'Students Name': ['Keza', 'Manzi'],
                          'Score': [50, 80]})
sciences_scores = pd.DataFrame({'Students Name': ['Keza', 'Manzi'],
                                  'Data Science': [90, 80],
                                  'Eath Science':[75,84]
                                  })

In [18]:
math_scores

Unnamed: 0,Students Name,Score
0,Keza,50
1,Manzi,80


In [19]:
sciences_scores

Unnamed: 0,Students Name,Data Science,Eath Science
0,Keza,90,75
1,Manzi,80,84


In [20]:
melt_science = science_scores.melt(id_vars='Students Name', value_vars=['Data Science', 'Earth Science'], var_name='Class Name', value_name='Score')
melt_science

NameError: name 'science_scores' is not defined

In [None]:
pd.concat([melt_science, math_scores]).fillna('Math')

Unnamed: 0,Students Name,Class Name,Score
0,Keza,Data Science,90
1,Manzi,Data Science,80
2,Keza,Earth Science,75
3,Manzi,Earth Science,84
0,Keza,Math,50
1,Manzi,Math,80
