# Task

Generated a Synthetic Dataset with columns of age, salary, department, years_experience, and is_manager.

In [2]:
import numpy as np
import pandas as pd

In [3]:
np.random.seed(42) # For reproducibility
n_samples = 100 # Number of samples

data = {
    'age': np.random.randint(18, 60, size=n_samples),
    'salary': np.random.randint(30000, 120000, size=n_samples),
    'department': np.random.choice(['IT', 'HR', 'Finance', 'Marketing'], size=n_samples),
    'years_experience': np.round(np.random.normal(5, 2, size=n_samples), 1),
    'is_manager': np.random.choice([0, 1], size=n_samples)
}
df = pd.DataFrame(data)

Q1. View data structure

In [4]:
df.head()


Unnamed: 0,age,salary,department,years_experience,is_manager
0,56,38392,IT,-0.8,0
1,46,60535,Marketing,3.4,1
2,32,108603,HR,5.0,1
3,25,82256,HR,4.2,1
4,38,119135,HR,4.1,1


Q2. Get DataFrame Info and Summary Stats

In [5]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               100 non-null    int64  
 1   salary            100 non-null    int64  
 2   department        100 non-null    object 
 3   years_experience  100 non-null    float64
 4   is_manager        100 non-null    int64  
dtypes: float64(1), int64(3), object(1)
memory usage: 4.0+ KB


In [6]:
df.describe()


Unnamed: 0,age,salary,years_experience,is_manager
count,100.0,100.0,100.0,100.0
mean,37.91,77809.16,4.823,0.47
std,12.219454,26058.643576,2.237822,0.501614
min,18.0,30206.0,-0.8,0.0
25%,26.75,55141.0,3.475,0.0
50%,38.0,80932.0,4.7,0.0
75%,46.25,98107.25,6.0,1.0
max,59.0,119474.0,11.4,1.0


Q3. Do Simple Numpy Operations

In [11]:
ages = df['age'].values
salaries = df['salary'].values


In [14]:
ages[:5], salaries[:5]


(array([56, 46, 32, 25, 38]), array([ 38392,  60535, 108603,  82256, 119135]))

In [17]:
np.mean(salaries)

np.float64(77809.16)

Q4. Filtering and Indexing Rows

In [18]:
df[df['age'] > 40]


Unnamed: 0,age,salary,department,years_experience,is_manager
0,56,38392,IT,-0.8,0
1,46,60535,Marketing,3.4,1
5,56,65222,Finance,2.8,0
10,41,40965,IT,9.5,1
11,53,54538,Marketing,5.3,0
12,57,100592,Finance,6.1,1
13,41,38110,HR,2.0,1
17,41,112948,Marketing,6.7,1
18,47,36910,Marketing,7.6,0
19,55,30206,Finance,2.3,0


In [19]:
df.iloc[:5]

Unnamed: 0,age,salary,department,years_experience,is_manager
0,56,38392,IT,-0.8,0
1,46,60535,Marketing,3.4,1
2,32,108603,HR,5.0,1
3,25,82256,HR,4.2,1
4,38,119135,HR,4.1,1


Q5. Adding a Column

In [22]:
df['bonus'] = df['salary'] * 0.10



Q6. Grouping and Aggregation

In [23]:
df.groupby('department')['salary'].mean()


Unnamed: 0_level_0,salary
department,Unnamed: 1_level_1
Finance,83124.708333
HR,73523.052632
IT,75825.47619
Marketing,77684.722222
