In [1]:
#This notebook demonstrates basic data manipulation using **Pandas**. Each step is explained below.
#Import Pandas and create DataFrame
#We import the pandas library and create a DataFrame containing employee data: Name, Age, Department, and Salary.

In [None]:
import pandas as pd

In [2]:
#create he dataframe
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
    'Age': [24, 27, 22, 32, 29],
    'Department': ['HR', 'Finance', 'IT', 'Marketing', 'HR'],
    'Salary': [45000, 54000, 50000, 62000, 47000]
}

In [3]:
#Display the dataframe
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,Name,Age,Department,Salary
0,Alice,24,HR,45000
1,Bob,27,Finance,54000
2,Charlie,22,IT,50000
3,Diana,32,Marketing,62000
4,Eve,29,HR,47000


In [6]:
#print the first five rows of dataframe 
print(df.head())

      Name  Age Department  Salary
0    Alice   24         HR   45000
1      Bob   27    Finance   54000
2  Charlie   22         IT   50000
3    Diana   32  Marketing   62000
4      Eve   29         HR   47000


In [8]:
#summary statistics for Age and Salary
print(df[['Age', 'Salary']].describe())

             Age        Salary
count   5.000000      5.000000
mean   26.800000  51600.000000
std     3.962323   6730.527468
min    22.000000  45000.000000
25%    24.000000  47000.000000
50%    27.000000  50000.000000
75%    29.000000  54000.000000
max    32.000000  62000.000000


In [9]:
#Averagee salary of the employee in HR department
avg_hr_salary = df[df['Department'] == 'HR']['Salary'].mean()
print("Average HR Salary:", avg_hr_salary)

Average HR Salary: 46000.0


In [10]:
# Add a new column 'Bonus' = 10% of Salary
df['Bonus'] = df['Salary'] * 0.10

In [11]:
df

Unnamed: 0,Name,Age,Department,Salary,Bonus
0,Alice,24,HR,45000,4500.0
1,Bob,27,Finance,54000,5400.0
2,Charlie,22,IT,50000,5000.0
3,Diana,32,Marketing,62000,6200.0
4,Eve,29,HR,47000,4700.0


In [15]:
#Filter employees Aged Between 25 and 30
age_filter = df[(df['Age']>= 25) & (df['Age']<=30)]
print(age_filter)

  Name  Age Department  Salary   Bonus
1  Bob   27    Finance   54000  5400.0
4  Eve   29         HR   47000  4700.0


In [16]:
#Group by department and calculate the average Salary
dept_avg_salary = df.groupby('Department')['Salary'].mean()
print(dept_avg_salary)

Department
Finance      54000.0
HR           46000.0
IT           50000.0
Marketing    62000.0
Name: Salary, dtype: float64


In [22]:
#sort by salary in ascending order
sorted_df = df.sort_values(by='Salary', ascending=True)
print(sorted_df)

      Name  Age Department  Salary   Bonus
0    Alice   24         HR   45000  4500.0
4      Eve   29         HR   47000  4700.0
2  Charlie   22         IT   50000  5000.0
1      Bob   27    Finance   54000  5400.0
3    Diana   32  Marketing   62000  6200.0


In [23]:
sorted_df.to_csv("sorted_employees.csv", index=False)

In [24]:
#Load the save csv file back intto dataframe
loaded_df = pd.read_csv("sorted_employees.csv")

In [26]:
print(loaded_df)

      Name  Age Department  Salary   Bonus
0    Alice   24         HR   45000  4500.0
1      Eve   29         HR   47000  4700.0
2  Charlie   22         IT   50000  5000.0
3      Bob   27    Finance   54000  5400.0
4    Diana   32  Marketing   62000  6200.0
