In [24]:
import pandas as pd

In [26]:
# Part A: DataFrame and Series Basics
# 1. Create a Pandas Series
series = pd.Series([10, 20, 30, 40, 50])
print("Series:", series)


Series: 0    10
1    20
2    30
3    40
4    50
dtype: int64


In [27]:
# 2. Create a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 28, 35],
    'Department': ['HR', 'IT', 'Finance', 'IT'],
    'Salary': [50000, 70000, 60000, 80000]
}

In [28]:
df = pd.DataFrame(data)

In [29]:
# a. First two rows
df.head(2) # first 2 rows of the dataframe


Unnamed: 0,Name,Age,Department,Salary
0,Alice,25,HR,50000
1,Bob,30,IT,70000


In [30]:
# b. Column names and data types
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        4 non-null      object
 1   Age         4 non-null      int64 
 2   Department  4 non-null      object
 3   Salary      4 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 260.0+ bytes


In [31]:
# 3. Set "Name" column as index
df.set_index('Name', inplace=True)
print("DataFrame with 'Name' as index:", df)

DataFrame with 'Name' as index:          Age Department  Salary
Name                           
Alice     25         HR   50000
Bob       30         IT   70000
Charlie   28    Finance   60000
David     35         IT   80000


In [32]:
# Part B: Data Selection & Filtering
# 4a. Select “Department” and “Salary” columns
print("Department and Salary Columns:", df[['Department', 'Salary']])

Department and Salary Columns:         Department  Salary
Name                      
Alice           HR   50000
Bob             IT   70000
Charlie    Finance   60000
David           IT   80000


In [45]:

# 4b. Select rows where Age > 28
print("Rows with Age > 28:", df[df['Age'] > 28])

Rows with Age > 28:        Age Department  Salary   Bonus
Name                                 
Bob     30         IT   70000  7000.0
David   35         IT   80000  8000.0


In [46]:
# 4c. Department = IT and Salary > 60000
print("IT Department with Salary > 60000:", df[(df['Department'] == 'IT') & (df['Salary'] > 60000)])


IT Department with Salary > 60000:        Age Department  Salary   Bonus
Name                                 
Bob     30         IT   70000  7000.0
David   35         IT   80000  8000.0


In [66]:
df[['Department', 'Salary']]  # selected department and salary>60000
df['Salary']>60000

Name
Alice      False
Bob         True
Charlie    False
David       True
Name: Salary, dtype: bool

In [47]:
# 5. Add new column “Bonus” which is 10% of Salary
df['Bonus'] = df['Salary'] * 0.10
print("DataFrame with Bonus column:", df)

DataFrame with Bonus column:          Age       Department  Salary   Bonus
Name                                         
Alice     25  Human Resources   50000  5000.0
Bob       30               IT   70000  7000.0
Charlie   28          Finance   60000  6000.0
David     35               IT   80000  8000.0


In [57]:
# 6. Replace “HR” with “Human Resources”
df['Department'] = df['Department'].replace('HR', 'Human Resources')
print("Department column after replacement:\n", df['Department'])

Department column after replacement:
 Name
Alice      Human Resources
Bob                     IT
Charlie            Finance
David                   IT
Name: Department, dtype: object


In [58]:
# Part C: Aggregation and Sorting
# 7. Average salary of employees in “IT”
it_avg_salary = df[df['Department'] == 'IT']['Salary'].mean()
print("Average Salary in IT:", it_avg_salary)

Average Salary in IT: 75000.0


In [59]:
# 8. Sort DataFrame by Salary descending
df_sorted = df.sort_values(by='Salary', ascending=False)
print("DataFrame sorted by Salary:\n", df_sorted)

DataFrame sorted by Salary:
          Age       Department  Salary   Bonus
Name                                         
David     35               IT   80000  8000.0
Bob       30               IT   70000  7000.0
Charlie   28          Finance   60000  6000.0
Alice     25  Human Resources   50000  5000.0


In [60]:
# 9. Count employees in each Department
dept_count = df['Department'].value_counts()
print("Employee count per Department:", dept_count)

Employee count per Department: Department
IT                 2
Human Resources    1
Finance            1
Name: count, dtype: int64


In [61]:
# 10. Reset index
df_reset = df.reset_index()
print("Reset index:", df_reset)

Reset index:       Name  Age       Department  Salary   Bonus
0    Alice   25  Human Resources   50000  5000.0
1      Bob   30               IT   70000  7000.0
2  Charlie   28          Finance   60000  6000.0
3    David   35               IT   80000  8000.0


In [62]:
# Part D: Loading and Advance Aggregation

In [63]:
# 11. Load CSV, check shape and missing values
# Create a dummy CSV file .
dummy_df = df_reset  # loaded data
print("Shape of loaded DataFrame:", dummy_df.shape)
print("Missing values:", dummy_df.isnull().sum())

Shape of loaded DataFrame: (4, 5)
Missing values: Name          0
Age           0
Department    0
Salary        0
Bonus         0
dtype: int64


In [64]:
# 12. Group by Department and compute mean Salary and Age
grouped = dummy_df.groupby('Department')[['Salary', 'Age']].mean()
print("Grouped Mean Salary and Age by Department:", grouped)

Grouped Mean Salary and Age by Department:                   Salary   Age
Department                    
Finance          60000.0  28.0
Human Resources  50000.0  25.0
IT               75000.0  32.5
