In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

ids = np.arange(1, 11)
ages = np.random.randint(18, 60, 10)
salaries = np.random.randint(30000, 90000, 10)
departments = np.array(["HR", "IT", "Finance", "IT", "HR", "Sales", "Finance", "IT", "Sales", "HR"])

DF = pd.DataFrame({
    "id": ids,
    "age": ages,
    "salary": salaries,
    "dept": departments
})

DF.to_csv("employees.csv", index=False)
print("Sample Data Created and Saved as employees.csv")
print(DF)

Sample Data Created and Saved as employees.csv
   id  age  salary     dept
0   1   56   77191       HR
1   2   46   74131       IT
2   3   32   46023  Finance
3   4   25   71090       IT
4   5   38   31685       HR
5   6   56   30769    Sales
6   7   36   89735  Finance
7   8   40   86101       IT
8   9   28   32433    Sales
9  10   28   35311       HR


# NumPy Questions (Q1-Q5)

In [6]:
# Q1. Create NumPy arrays from ages and salaries data
ages_ary = np.array(ages)
salaries_ary = np.array(salaries)

print("Ages Array is:", ages_ary)
print("Ages datatype:", ages_ary.dtype, "|| ndim:", ages_ary.ndim, "|| shape:", ages_ary.shape, "|| size:", ages_ary.size)
print("\nSalaries Array is:", salaries_ary)
print("Salaries datatype:", salaries_ary.dtype, "|| ndim:", salaries_ary.ndim, "|| shape:", salaries_ary.shape, "|| size:", salaries_ary.size)

Ages Array is: [56 46 32 25 38 56 36 40 28 28]
Ages datatype: int32 || ndim: 1 || shape: (10,) || size: 10

Salaries Array is: [77191 74131 46023 71090 31685 30769 89735 86101 32433 35311]
Salaries datatype: int32 || ndim: 1 || shape: (10,) || size: 10


In [None]:
# Q2. Find highest, lowest, average salary and average age
highest_salary = np.max(salaries_ary)
lowest_salary = np.min(salaries_ary)
average_salary = np.mean(salaries_ary)
average_age = np.mean(ages_ary)

print(f"Highest Salary: ${highest_salary}")
print(f"Lowest Salary: ${lowest_salary}")
print(f"Average Salary: ${average_salary:.2f}")
print(f"Average Age: {average_age:.2f}")

In [7]:
more_than_30 = ages_ary[ages_ary > 30]
count_30plus = np.sum(ages_ary > 30)

print("Ages greater than 30 are :", more_than_30)
print(f"Employees older than 30 are : {count_30plus}")

Ages greater than 30 are : [56 46 32 38 56 36 40]
Employees older than 30 are : 7


In [None]:
# Q4. Create new array with ages increased by 5 years
new_ages = ages_ary + 5

print("Original ages:", ages_ary)
print("Modified ages:", new_ages)

In [None]:
# Q5. Calculate total salary expense and salary range
total_salaries = np.sum(salaries_ary)
salary_diff = np.max(salaries_ary) - np.min(salaries_ary)

print(f"Total Salary Expense: ${total_salaries}")
print(f"Salary Range (Max - Min): ${salary_diff}")

# Pandas Questions (Q6-Q10)

In [9]:
# Q6. Load CSV file and display basic info
df = pd.read_csv("employees.csv")

print("First 5 rows:")
print(df.head(5))
print("\nDataFrame Info:")
print(df.info())
print("\nSummary Statistics:")
print(df.describe())

First 5 rows:
   id  age  salary     dept
0   1   56   77191       HR
1   2   46   74131       IT
2   3   32   46023  Finance
3   4   25   71090       IT
4   5   38   31685       HR

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10 non-null     int64 
 1   age     10 non-null     int64 
 2   salary  10 non-null     int64 
 3   dept    10 non-null     object
dtypes: int64(3), object(1)
memory usage: 452.0+ bytes
None

Summary Statistics:
             id       age        salary
count  10.00000  10.00000     10.000000
mean    5.50000  38.50000  57446.900000
std     3.02765  11.16791  24357.262884
min     1.00000  25.00000  30769.000000
25%     3.25000  29.00000  33152.500000
50%     5.50000  37.00000  58556.500000
75%     7.75000  44.50000  76426.000000
max    10.00000  56.00000  89735.000000


In [10]:
# Q7. Display specific columns and last 3 rows
print("ID, Age, and Salary columns:")
print(df[['id', 'age', 'salary']])
print("\nLast 3 rows:")
print(df.tail(3))

ID, Age, and Salary columns:
   id  age  salary
0   1   56   77191
1   2   46   74131
2   3   32   46023
3   4   25   71090
4   5   38   31685
5   6   56   30769
6   7   36   89735
7   8   40   86101
8   9   28   32433
9  10   28   35311

Last 3 rows:
   id  age  salary   dept
7   8   40   86101     IT
8   9   28   32433  Sales
9  10   28   35311     HR


In [11]:
# Q8. Filter IT department employees
it_emp = df[df['dept'] == 'IT']
it_count = len(it_emp)

print("IT Department Employees:")
print(it_emp)
print(f"\nTotal number of IT employees: {it_count}")

IT Department Employees:
   id  age  salary dept
1   2   46   74131   IT
3   4   25   71090   IT
7   8   40   86101   IT

Total number of IT employees: 3


In [None]:
# Q9. Sort by salary (descending) and show top 3
sorted_df = df.sort_values('salary', ascending=False)
top_3_paid = sorted_df[['id', 'age', 'salary', 'dept']].head(3)

print("Top 3 highest-paid employees:")
print(top_3_paid)

In [None]:
# Q10. Replace salaries > 80000 with 80000 and calculate new average
print("Original average salary:", df['salary'].mean())

df.loc[df['salary'] > 80000, 'salary'] = 80000
new_average_salary = df['salary'].mean()

print(f"New average salary after replacement: ${new_average_salary:.2f}")