In [1]:
import pandas as pd

# Sample data
data = {
    "Employee ID": ["E-001", "E-002", "E-003", "E-004", "E-005"],
    "Name": ["Alice Smith", "Bob Johnson", "Charlie Lee", "David Brown", "Eva Green"],
    "Department": ["HR", "Engineering", "Marketing", "Finance", "IT"],
    "Gender": ["F", "M", "M", "M", "F"],
    "Age": [34, 45, 29, 40, 31],
    "Salary (USD)": [70000, 85000, 60000, 95000, 80000],
    "Years with Company": [5, 10, 3, 12, 7],
    "Location": ["New York", "San Francisco", "Los Angeles", "Chicago", "Austin"]
}

# Create DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print("Original DataFrame:")
print(df)

# Displaying first few rows of the DataFrame to inspect
print("\nFirst few rows of the DataFrame:")
print(df.head())

# Checking the structure and basic information of the DataFrame
print("\nDataFrame info:")
print(df.info())

# Generating summary statistics for numerical columns
print("\nSummary statistics:")
print(df.describe())

# Filtering Data: Selecting employees from the Engineering department
engineering_employees = df[df['Department'] == 'Engineering']
print("\nEmployees from the Engineering department:")
print(engineering_employees)

# Adding a New Column: Calculating and adding a new column for Annual Bonus (10% of the salary)
df['Annual Bonus (USD)'] = df['Salary (USD)'] * 0.10
print("\nDataFrame with Annual Bonus column:")
print(df)

# Sorting Data: Sorting employees by age
sorted_by_age = df.sort_values(by='Age')
print("\nEmployees sorted by age:")
print(sorted_by_age)

# Grouping Data: Grouping by department and calculating the average salary
average_salary_by_department = df.groupby('Department')['Salary (USD)'].mean()
print("\nAverage salary by department:")
print(average_salary_by_department)

# Aggregation: Calculating the total salary and average years with the company
total_salary = df['Salary (USD)'].sum()
average_years_with_company = df['Years with Company'].mean()
print("\nTotal salary of all employees:", total_salary)
print("Average years with the company:", average_years_with_company)


Original DataFrame:
  Employee ID         Name   Department Gender  Age  Salary (USD)  \
0       E-001  Alice Smith           HR      F   34         70000   
1       E-002  Bob Johnson  Engineering      M   45         85000   
2       E-003  Charlie Lee    Marketing      M   29         60000   
3       E-004  David Brown      Finance      M   40         95000   
4       E-005    Eva Green           IT      F   31         80000   

   Years with Company       Location  
0                   5       New York  
1                  10  San Francisco  
2                   3    Los Angeles  
3                  12        Chicago  
4                   7         Austin  

First few rows of the DataFrame:
  Employee ID         Name   Department Gender  Age  Salary (USD)  \
0       E-001  Alice Smith           HR      F   34         70000   
1       E-002  Bob Johnson  Engineering      M   45         85000   
2       E-003  Charlie Lee    Marketing      M   29         60000   
3       E-004  David 