In [0]:
employees_dirty = [
    {"id": "001", "name": "   nilesh ", "age": "29", "department": "Data", "salary": "75000", "joining_date": "2019-04-10"},
    {"id": "002", "name": "Ravi", "age": "NaN", "department": "data", "salary": "72000", "joining_date": "2018-06-20"},
    {"id": "003", "name": "Sneha", "age": 27, "department": "", "salary": None, "joining_date": "2020-11-15"},
    {"id": "004", "name": "Anjali", "age": -25, "department": "HR", "salary": "sixty thousand", "joining_date": "2017-03-01"},
    {"id": "005", "name": "Kiran", "age": 35, "department": "Finance", "salary": "90000", "joining_date": ""},
    {"id": "006", "name": "Ravi", "age": "30", "department": "DATA", "salary": "72000", "joining_date": "2018-06-20"},  # duplicate
    {"id": "007", "name": "", "age": 40, "department": "Finance", "salary": 88000, "joining_date": "NaN"},
    {"id": "008", "name": "Suresh", "age": None, "department": "IT", "salary": "100000", "joining_date": "2016-09-12"},
]


In [0]:
def CleanData(employees_dirty):
    clean = []  # new list to store cleaned records

    for emp in employees_dirty:
        record = emp.copy()  # make a copy so we donâ€™t change original

        # --- Clean Name ---
        name = record.get("name", "").strip().capitalize()
        record["name"] = name if name != "" else "Unknown"

        # --- Clean Department ---
        dept = record.get("department", "").strip().upper()
        record["department"] = dept if dept != "" else "UNASSIGNED"

        # --- Clean Age ---
        try:
            age = int(record.get("age", 0))
            record["age"] = age if age > 0 else None
        except:
            record["age"] = None

        # --- Clean Salary ---
        salary = record.get("salary", None)
        try:
            record["salary"] = int(salary)
        except:
            # handle cases like "sixty thousand" or None
            record["salary"] = None

        # --- Clean Joining Date ---
        join_date = record.get("joining_date", "").strip()
        record["joining_date"] = join_date if join_date not in ["", "NaN", None] else "Unknown"

        # Add to cleaned list
        clean.append(record)

    # --- Remove Duplicates (based on 'id') ---
    unique = []
    seen_ids = set()
    for emp in clean:
        if emp["id"] not in seen_ids:
            unique.append(emp)
            seen_ids.add(emp["id"])

    return unique



In [0]:
data=CleanData(employees_dirty)
for Cleaned_data in data:
  print(Cleaned_data)

{'id': '001', 'name': 'Nilesh', 'age': 29, 'department': 'DATA', 'salary': 75000, 'joining_date': '2019-04-10'}
{'id': '002', 'name': 'Ravi', 'age': None, 'department': 'DATA', 'salary': 72000, 'joining_date': '2018-06-20'}
{'id': '003', 'name': 'Sneha', 'age': 27, 'department': 'UNASSIGNED', 'salary': None, 'joining_date': '2020-11-15'}
{'id': '004', 'name': 'Anjali', 'age': None, 'department': 'HR', 'salary': None, 'joining_date': '2017-03-01'}
{'id': '005', 'name': 'Kiran', 'age': 35, 'department': 'FINANCE', 'salary': 90000, 'joining_date': 'Unknown'}
{'id': '006', 'name': 'Ravi', 'age': 30, 'department': 'DATA', 'salary': 72000, 'joining_date': '2018-06-20'}
{'id': '007', 'name': 'Unknown', 'age': 40, 'department': 'FINANCE', 'salary': 88000, 'joining_date': 'Unknown'}
{'id': '008', 'name': 'Suresh', 'age': None, 'department': 'IT', 'salary': 100000, 'joining_date': '2016-09-12'}


In [0]:
def calculate_avg_salary(Cleaned_data):
  total=0
  count=0
  for i in Cleaned_data:
    if i["salary"] is not None:
      total+=i["salary"]
      count+=1
  return total /count if count > 0 else 0




In [0]:
avg=calculate_avg_salary(data)
print(avg)

82833.33333333333


In [0]:
def oldest_youngest(data):
    valid_employees = [emp for emp in data if emp.get("age") is not None]

    if not valid_employees:
        return None, None  # Return None for both if no valid ages are found

    oldest = max(valid_employees, key=lambda emp: emp["age"])
    youngest = min(valid_employees, key=lambda emp: emp["age"])

    return oldest, youngest

In [0]:
new_old_young=oldest_youngest(data)


In [0]:
print(new_old_young)

({'id': '007', 'name': 'Unknown', 'age': 40, 'department': 'FINANCE', 'salary': 88000, 'joining_date': 'Unknown'}, {'id': '003', 'name': 'Sneha', 'age': 27, 'department': 'UNASSIGNED', 'salary': None, 'joining_date': '2020-11-15'})


In [0]:
def salary_min_max(data):
    valid_employees = [emp for emp in data if emp.get("salary") is not None]

    if not valid_employees:
        return None, None  # Return None for both if no valid ages are found

    maximum = max(valid_employees, key=lambda emp: emp["salary"])
    minimum = min(valid_employees, key=lambda emp: emp["salary"])

    return maximum, minimum

In [0]:
cal=salary_min_max(data)
print(cal)

({'id': '008', 'name': 'Suresh', 'age': None, 'department': 'IT', 'salary': 100000, 'joining_date': '2016-09-12'}, {'id': '002', 'name': 'Ravi', 'age': None, 'department': 'DATA', 'salary': 72000, 'joining_date': '2018-06-20'})


In [0]:
def find_critical(data):
  critical={}
  for dept in data:
    dept1=dept["department"]
    if dept1 not in critical:
      critical[dept1]={"total_salary":0}
  return critical


In [0]:
dept2=find_critical(data)

In [0]:
print(dept2)

{'DATA': {'total_salary': 0}, 'UNASSIGNED': {'total_salary': 0}, 'HR': {'total_salary': 0}, 'FINANCE': {'total_salary': 0}, 'IT': {'total_salary': 0}}


In [0]:
def FindMissingInfo(cleaned_data):
    missing = []  # list to store employees with missing info
    for emp in cleaned_data:
        # if either age or salary is missing
        if emp.get("age") is None or emp.get("salary") is None:
            missing.append(emp["name"])

    return missing


In [0]:
miss=FindMissingInfo(data)

In [0]:
print(miss)

['Ravi', 'Sneha', 'Anjali', 'Suresh']


In [0]:
print(data)

[{'id': '001', 'name': 'Nilesh', 'age': 29, 'department': 'DATA', 'salary': 75000, 'joining_date': '2019-04-10'}, {'id': '002', 'name': 'Ravi', 'age': None, 'department': 'DATA', 'salary': 72000, 'joining_date': '2018-06-20'}, {'id': '003', 'name': 'Sneha', 'age': 27, 'department': 'UNASSIGNED', 'salary': None, 'joining_date': '2020-11-15'}, {'id': '004', 'name': 'Anjali', 'age': None, 'department': 'HR', 'salary': None, 'joining_date': '2017-03-01'}, {'id': '005', 'name': 'Kiran', 'age': 35, 'department': 'FINANCE', 'salary': 90000, 'joining_date': 'Unknown'}, {'id': '006', 'name': 'Ravi', 'age': 30, 'department': 'DATA', 'salary': 72000, 'joining_date': '2018-06-20'}, {'id': '007', 'name': 'Unknown', 'age': 40, 'department': 'FINANCE', 'salary': 88000, 'joining_date': 'Unknown'}, {'id': '008', 'name': 'Suresh', 'age': None, 'department': 'IT', 'salary': 100000, 'joining_date': '2016-09-12'}]


In [0]:
def search_name(data,name):
  found_list=[]
  for emp in data:
    if emp.get("name","").lower()==name.lower():
      found_list.append(emp)
  return found_list



In [0]:
data=CleanData(employees_dirty)   #we are calling cleanedData function coz we need updated cleaneddata for that reasone

In [0]:
# Step 2: Take name input from user
name = input("Enter employee name to search: ")

Enter employee name to search:  SURESH

In [0]:
result = search_name(data, name)

In [0]:
print(result)

[{'id': '008', 'name': 'Suresh', 'age': None, 'department': 'IT', 'salary': 100000, 'joining_date': '2016-09-12'}]
