In [2]:
import numpy as np
import pandas as pd

In [3]:
np.random.seed(42) # For reproducibility
n_samples = 100 # Number of samples

data = {
    'age': np.random.randint(18, 60, size=n_samples),
    'salary': np.random.randint(30000, 120000, size=n_samples),
    'department': np.random.choice(['IT', 'HR', 'Finance', 'Marketing'], size=n_samples),
    'years_experience': np.round(np.random.normal(5, 2, size=n_samples), 1),
    'is_manager': np.random.choice([0, 1], size=n_samples)
}
df = pd.DataFrame(data)

Q1. View data structure

In [4]:
display(df)

Unnamed: 0,age,salary,department,years_experience,is_manager
0,56,38392,IT,-0.8,0
1,46,60535,Marketing,3.4,1
2,32,108603,HR,5.0,1
3,25,82256,HR,4.2,1
4,38,119135,HR,4.1,1
...,...,...,...,...,...
95,59,82662,IT,4.0,0
96,56,42688,HR,4.4,1
97,58,55342,Marketing,6.0,0
98,45,67157,HR,11.4,0


Q2. Get DataFrame Info and Summary Stats

In [5]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               100 non-null    int32  
 1   salary            100 non-null    int32  
 2   department        100 non-null    object 
 3   years_experience  100 non-null    float64
 4   is_manager        100 non-null    int32  
dtypes: float64(1), int32(3), object(1)
memory usage: 2.4+ KB


Unnamed: 0,age,salary,years_experience,is_manager
count,100.0,100.0,100.0,100.0
mean,37.91,77809.16,4.823,0.47
std,12.219454,26058.643576,2.237822,0.501614
min,18.0,30206.0,-0.8,0.0
25%,26.75,55141.0,3.475,0.0
50%,38.0,80932.0,4.7,0.0
75%,46.25,98107.25,6.0,1.0
max,59.0,119474.0,11.4,1.0


Q3. Do Simple Numpy Operations

In [8]:
import numpy as np
arr1 = np.array([1, 2, 3])
arr2 = np.zeros((2, 3))
print(arr1)
print(arr2)

[1 2 3]
[[0. 0. 0.]
 [0. 0. 0.]]


In [13]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
c = a + b   
d = a * 2  
print("Element-wise addition:",c)
print("Scalar multiplication:",d)
print("Sqrt:",np.sqrt(c))
print("Exponent:",np.exp(c))
print("Log:",np.log(c))
print("Sine:",np.sin(c))

Element-wise addition: [5 7 9]
Scalar multiplication: [2 4 6]
Sqrt: [2.23606798 2.64575131 3.        ]
Exponent: [ 148.4131591  1096.63315843 8103.08392758]
Log: [1.60943791 1.94591015 2.19722458]
Sine: [-0.95892427  0.6569866   0.41211849]


Q4. Filtering and Indexing Rows

In [15]:
filtered = df[df['salary'] > 50000]
print(filtered)
selected = df.iloc[:5, [0, 1]]  # First 5 rows, 'age' and 'salary' columns
print(selected)


    age  salary department  years_experience  is_manager
1    46   60535  Marketing               3.4           1
2    32  108603         HR               5.0           1
3    25   82256         HR               4.2           1
4    38  119135         HR               4.1           1
5    56   65222    Finance               2.8           0
..  ...     ...        ...               ...         ...
94   22  100467         IT               9.2           0
95   59   82662         IT               4.0           0
97   58   55342  Marketing               6.0           0
98   45   67157         HR              11.4           0
99   24   97863         HR               5.2           1

[83 rows x 5 columns]
   age  salary
0   56   38392
1   46   60535
2   32  108603
3   25   82256
4   38  119135


Q5. Adding a Column

In [16]:
df['bonus'] = df['salary'] * 0.1
display(df.head())

Unnamed: 0,age,salary,department,years_experience,is_manager,bonus
0,56,38392,IT,-0.8,0,3839.2
1,46,60535,Marketing,3.4,1,6053.5
2,32,108603,HR,5.0,1,10860.3
3,25,82256,HR,4.2,1,8225.6
4,38,119135,HR,4.1,1,11913.5


Q6. Grouping and Aggregation

In [17]:
grouped = df.groupby('department').agg({
    'salary': 'sum',  
    'bonus': 'sum',  
    'is_manager': 'count'  
})
print("Total salary, total bonus, and count of employees by department:\n", grouped, "\n")

Total salary, total bonus, and count of employees by department:
              salary     bonus  is_manager
department                               
Finance     1994993  199499.3          24
HR          1396938  139693.8          19
IT          1592335  159233.5          21
Marketing   2796650  279665.0          36 

