In [None]:
#TASK



np.random.seed(42) # For reproducibility
n_samples = 100 # Number of samples

data = {
    'age': np.random.randint(18, 60, size=n_samples),
    'salary': np.random.randint(30000, 120000, size=n_samples),
    'department': np.random.choice(['IT', 'HR', 'Finance', 'Marketing'], size=n_samples),
    'years_experience': np.round(np.random.normal(5, 2, size=n_samples), 1),
    'is_manager': np.random.choice([0, 1], size=n_samples)
}
df = pd.DataFrame(data)

In [None]:
#Q1. View data structure
print(df.shape)
print(df.columns.tolist())
print(df.dtypes)
display(df.head())


(100, 5)
['age', 'salary', 'department', 'years_experience', 'is_manager']
age                   int64
salary                int64
department           object
years_experience    float64
is_manager            int64
dtype: object


Unnamed: 0,age,salary,department,years_experience,is_manager
0,56,38392,IT,-0.8,0
1,46,60535,Marketing,3.4,1
2,32,108603,HR,5.0,1
3,25,82256,HR,4.2,1
4,38,119135,HR,4.1,1


In [None]:
#Q2. Get DataFrame info and summary stats
df.info()
display(df.describe())
display(df['department'].value_counts())
display(df['is_manager'].value_counts())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               100 non-null    int64  
 1   salary            100 non-null    int64  
 2   department        100 non-null    object 
 3   years_experience  100 non-null    float64
 4   is_manager        100 non-null    int64  
dtypes: float64(1), int64(3), object(1)
memory usage: 4.0+ KB


Unnamed: 0,age,salary,years_experience,is_manager
count,100.0,100.0,100.0,100.0
mean,37.91,77809.16,4.823,0.47
std,12.219454,26058.643576,2.237822,0.501614
min,18.0,30206.0,-0.8,0.0
25%,26.75,55141.0,3.475,0.0
50%,38.0,80932.0,4.7,0.0
75%,46.25,98107.25,6.0,1.0
max,59.0,119474.0,11.4,1.0


Unnamed: 0_level_0,count
department,Unnamed: 1_level_1
Marketing,36
Finance,24
IT,21
HR,19


Unnamed: 0_level_0,count
is_manager,Unnamed: 1_level_1
0,53
1,47


In [None]:
#Q3. Do simple NumPy operations
arr_salary = df['salary'].to_numpy()
salary_mean = np.mean(arr_salary)
salary_std  = np.std(arr_salary, ddof=1)
salary_z = (arr_salary - salary_mean) / salary_std
print("Mean salary:", salary_mean)
print("Sample std salary:", salary_std)
print("First 5 z-scores:", salary_z[:5])
ratio = df['years_experience'].to_numpy() / (arr_salary / 1000.0)
print("First 5 exp/salary(1k):", ratio[:5])


Mean salary: 77809.16
Sample std salary: 26058.64357568222
First 5 z-scores: [-1.51263284 -0.66289559  1.18171308  0.17064741  1.5858784 ]
First 5 exp/salary(1k): [-0.02083767  0.05616585  0.04603924  0.05106011  0.03441474]


In [None]:
#Q4. Filtering and indexing rows
mask = (
    (df['department'].isin(['IT', 'Finance'])) &
    (df['salary'] > 80000) &
    (df['years_experience'] >= 5)
)
filtered = df.loc[mask, ['age','salary','department',
                         'years_experience','is_manager']]
display(filtered.head())
display(df.iloc[:5, :3])
subset = df.loc[(df['is_manager']==1) & (df['salary']>=90000),
 ['department','salary','years_experience']]
display(subset.sort_values('salary', ascending=False).head())



Unnamed: 0,age,salary,department,years_experience,is_manager
6,36,107373,Finance,6.7,1
12,57,100592,Finance,6.1,1
21,38,117897,Finance,5.8,0
24,39,80015,Finance,8.0,1
25,42,84268,Finance,5.8,0


Unnamed: 0,age,salary,department
0,56,38392,IT
1,46,60535,Marketing
2,32,108603,HR
3,25,82256,HR
4,38,119135,HR


Unnamed: 0,department,salary,years_experience
79,Marketing,119474,4.3
4,HR,119135,4.1
82,IT,115616,9.3
8,Marketing,114651,5.8
17,Marketing,112948,6.7


In [None]:
#Q5. Adding a column
df = df.copy()
df['salary_k'] = (df['salary'] / 1000.0).round(1)
df['exp_per_year_pay_k'] = (df['years_experience'] / df['salary_k']).round(3)
df['senior_flag'] = (df['years_experience'] >= 7).astype(int)
display(df.head())

Unnamed: 0,age,salary,department,years_experience,is_manager,salary_k,exp_per_year_pay_k,senior_flag
0,56,38392,IT,-0.8,0,38.4,-0.021,0
1,46,60535,Marketing,3.4,1,60.5,0.056,0
2,32,108603,HR,5.0,1,108.6,0.046,0
3,25,82256,HR,4.2,1,82.3,0.051,0
4,38,119135,HR,4.1,1,119.1,0.034,0


In [None]:
#Q6. Grouping and aggregation
agg_dept = (
    df.groupby('department', observed=True)
      .agg(
          avg_age=('age','mean'),
          avg_salary=('salary','mean'),
          median_exp=('years_experience','median'),
          managers=('is_manager','sum'),
          count=('age','count')
      )
      .round(2)
)
display(agg_dept)
agg_dept_mgr = (
    df.groupby(['department','is_manager'], observed=True)
      .agg(
          avg_salary=('salary','mean'),
          n=('salary','count')
      )
      .round(2)
)
display(agg_dept_mgr)
pivot = pd.pivot_table(
    df,
    values='salary',
    index='department',
    columns='is_manager',
    aggfunc='mean'
).round(2)
display(pivot)


Unnamed: 0_level_0,avg_age,avg_salary,median_exp,managers,count
department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Finance,39.42,83124.71,4.25,10,24
HR,37.16,73523.05,4.2,10,19
IT,36.81,75825.48,4.3,13,21
Marketing,37.94,77684.72,5.3,14,36


Unnamed: 0_level_0,Unnamed: 1_level_0,avg_salary,n
department,is_manager,Unnamed: 2_level_1,Unnamed: 3_level_1
Finance,0,84325.07,14
Finance,1,81444.2,10
HR,0,70923.44,9
HR,1,75862.7,10
IT,0,80391.0,8
IT,1,73015.92,13
Marketing,0,72388.91,22
Marketing,1,86006.71,14


is_manager,0,1
department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,84325.07,81444.2
HR,70923.44,75862.7
IT,80391.0,73015.92
Marketing,72388.91,86006.71
