In [10]:
import pandas as pd
import numpy as np

# 1. Load the data
data = {
    'EmployeeID': [101, 102, 103, 104, 105],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR'],
    'Salary': [50000, 60000, 55000, np.nan, 52000],
    'JoiningDate': ['2020-01-15', '2019-03-22', '2021-07-10', '2022-05-30', '2020-11-05']
}

df = pd.DataFrame(data)

# 2. Clean the data
# Handle missing values
df['Salary'].fillna(df['Salary'].mean(), inplace=True)

# Convert 'JoiningDate' to datetime
df['JoiningDate'] = pd.to_datetime(df['JoiningDate'])

# 3. Exploratory Data Analysis (EDA)
print("Data Summary:")
print(df.describe())

print("\nData Types:")
print(df.dtypes)

# 4. Performance Analysis
# Average salary by department
avg_salary = df.groupby('Department')['Salary'].mean()
print("\nAverage Salary by Department:")
print(avg_salary)



Data Summary:
       EmployeeID        Salary          JoiningDate
count    5.000000      5.000000                    5
mean   103.000000  54250.000000  2020-10-21 19:12:00
min    101.000000  50000.000000  2019-03-22 00:00:00
25%    102.000000  52000.000000  2020-01-15 00:00:00
50%    103.000000  54250.000000  2020-11-05 00:00:00
75%    104.000000  55000.000000  2021-07-10 00:00:00
max    105.000000  60000.000000  2022-05-30 00:00:00
std      1.581139   3766.629793                  NaN

Data Types:
EmployeeID              int64
Name                   object
Department             object
Salary                float64
JoiningDate    datetime64[ns]
dtype: object

Average Salary by Department:
Department
Finance    55000.0
HR         51000.0
IT         57125.0
Name: Salary, dtype: float64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].mean(), inplace=True)
