In [2]:
import numpy as np
import pandas as pd

In [3]:
# Load dataset
file_path = r"C:\Users\ASHISH\Downloads\Employee_Salaries.csv"
df = pd.read_csv(file_path)

In [12]:
# Creating Arrays
arr = df[['Base_Salary', 'Overtime_Pay', 'Longevity_Pay']].to_numpy()

# Fixed Type Arrays
dtype_arr = arr.astype(np.float64)

# Array Indexing and Slicing
first_5_salaries = arr[:5]
selected_salaries = arr[:5, 0]  # First 5 Base Salaries

# Reshaping Arrays
reshaped_arr = arr.reshape(-1, 3)

# Concatenation and Splitting
split_arr = np.array_split(arr, 3)  # Split into three parts
concatenated_arr = np.concatenate([arr, arr], axis=0)

# Universal Functions
log_salaries = np.log1p(arr[:, 0])  # Log of Base Salary
sqrt_salaries = np.sqrt(arr[:, 0])  # Square root of Base Salary
exp_salaries = np.exp(np.clip(arr[:, 0], None, 700)) # Exponential of Base Salary
sin_salaries = np.sin(arr[:, 0])  # Sine transformation

# Aggregations
mean_salary = np.mean(arr[:, 0])
median_salary = np.median(arr[:, 0])
sum_salary = np.sum(arr[:, 0])
std_salary = np.std(arr[:, 0])
min_salary = np.min(arr[:, 0]) 
max_salary = np.max(arr[:, 0])
var_salary = np.var(arr[:, 0])

# Broadcasting Rules
scaled_salaries = arr[:, 0] * 1.1  # Increase salary by 10%
normalized_salaries = (arr[:, 0] - mean_salary) / std_salary

# Comparisons and Boolean Arrays
high_salary = arr[:, 0] > 100000
filtered_salaries = arr[high_salary]
low_salary = arr[:, 0] < 50000
equal_salaries = arr[:, 0] == arr[:, 1]

# Masks and Fancy Indexing
top_5_salaries = arr[np.argsort(arr[:, 0])][-5:]
bottom_5_salaries = arr[np.argsort(arr[:, 0])][:5]
random_indices = np.random.choice(arr.shape[0], 5, replace=False)
random_salaries = arr[random_indices]

# Fast Sorting
sorted_salaries = np.sort(arr[:, 0])
sorted_indices = np.argsort(arr[:, 0])
reverse_sorted_salaries = np.sort(arr[:, 0])[::-1]

# Structured Arrays
structured_arr = np.array(list(zip(df['Department'], df['Base_Salary'])), dtype=[('Department', 'U50'), ('Base_Salary', 'f8')])

print("### UNIT 1: NumPy Operations ###")
print(f"Mean Salary: {mean_salary}")
print(f"Median Salary: {median_salary}")
print(f"Variance of Salary: {var_salary}")
print(f"Top 5 Salaries:\n{top_5_salaries}")
print(f"Bottom 5 Salaries:\n{bottom_5_salaries}")
print(f"Random Salaries:\n{random_salaries}\n")

### UNIT 1: NumPy Operations ###
Mean Salary: 90312.16574420367
Median Salary: 87328.0
Variance of Salary: 975895427.6852129
Top 5 Salaries:
[[246000.        0.        0.  ]
 [246162.47      0.        0.  ]
 [258000.        0.        0.  ]
 [258000.        0.        0.  ]
 [292000.        0.        0.  ]]
Bottom 5 Salaries:
[[11147.24       0.         0.    ]
 [18257.5        0.         0.    ]
 [18257.5        0.         0.    ]
 [18896.5125     0.         0.    ]
 [18896.5125     0.         0.    ]]
Random Salaries:
[[6.35000000e+04 0.00000000e+00 0.00000000e+00]
 [1.75273070e+05 0.00000000e+00 0.00000000e+00]
 [2.25513425e+04 1.65900000e+01 0.00000000e+00]
 [1.32919682e+05 0.00000000e+00 0.00000000e+00]
 [8.79210000e+04 1.17481000e+04 0.00000000e+00]]



In [11]:
### UNIT 2: Pandas Operations ###

# Series and DataFrame Objects
series_obj = pd.Series(df['Base_Salary'])
df_obj = pd.DataFrame(df)

# Data Indexing and Selection
df_subset = df[['Department', 'Base_Salary']]
df_filtered = df[df['Base_Salary'] > 100000]
df_sample = df.sample(n=10)

# Universal Functions
df['Base_Salary_Squared'] = df['Base_Salary'].apply(lambda x: x**2)
df['Salary_Log'] = np.log1p(df['Base_Salary'])
df['Salary_Cubed'] = df['Base_Salary'].apply(lambda x: x**3)

# Index Alignment and Operations
df['Salary_Adjusted'] = df['Base_Salary'] + df['Longevity_Pay'].fillna(0)
df['Salary_Ratio'] = df['Base_Salary'] / (df['Overtime_Pay'] + 1)
df['Salary_Difference'] = df['Base_Salary'] - df['Overtime_Pay']

# Handling Missing Data
df_filled = df.fillna("Unknown")
df_dropped = df.dropna()
df_fill_mean = df.fillna(df.mean(numeric_only=True))
df_fill_median = df.fillna(df.median(numeric_only=True))

# Hierarchical Indexing
df_hier = df.set_index(['Department', 'Division'])

print("### UNIT 2: Pandas Operations ###")
print("Filtered Data where Salary > 100000:")
print(df_filtered.head())
print("\nSample Data:")
print(df_sample)


### UNIT 2: Pandas Operations ###
Filtered Data where Salary > 100000:
  Department            Department_Name                        Division  \
0        ABS  Alcohol Beverage Services           ABS 85 Administration   
1        ABS  Alcohol Beverage Services           ABS 85 Administration   
2        ABS  Alcohol Beverage Services           ABS 85 Administration   
8        ABS  Alcohol Beverage Services  ABS 85 Administrative Services   
9        ABS  Alcohol Beverage Services  ABS 85 Administrative Services   

  Gender  Base_Salary  Overtime_Pay  Longevity_Pay Grade  Base_Salary_Squared  \
0      M    175873.00           0.0           0.00    M2         3.093131e+10   
1      M    145613.36           0.0           0.00    M3         2.120325e+10   
2      F    136970.00           0.0           0.00    M3         1.876078e+10   
8      F    149464.15           0.0        9021.82    18         2.233953e+10   
9      M    117424.00           0.0           0.00   N25         1.378840

In [10]:
### UNIT 3: Combining and Analyzing Data ###

# Combining Datasets
df_copy = df.copy()
df_concat = pd.concat([df, df_copy], axis=0)
df_merged = df.merge(df_copy, on='Department', how='inner')
df_appended = pd.concat([df, df_copy], ignore_index=True)

df_grouped = df.groupby('Department')[['Base_Salary']].mean()
df_grouped_median = df.groupby('Department')[['Base_Salary']].median()
df_grouped_std = df.groupby('Department')[['Base_Salary']].std()
df_grouped_max = df.groupby('Department')[['Base_Salary']].max()
df_grouped_min = df.groupby('Department')[['Base_Salary']].min()

# Pivot Table
pivot_table = df.pivot_table(values='Base_Salary', index='Department', columns='Gender', aggfunc='mean')

# Sorting
df_sorted = df.sort_values(by='Base_Salary', ascending=False)
df_sorted_overtime = df.sort_values(by='Overtime_Pay', ascending=False)
df_sorted_longevity = df.sort_values(by='Longevity_Pay', ascending=False)

print("### UNIT 3: Combining and Analyzing Data ###")
print("Grouped Data by Department - Mean Salary:")
print(df_grouped.head())
print("\nPivot Table:")
print(pivot_table.head())
print("\nSorted Data by Salary:")
print(df_sorted.head())


### UNIT 3: Combining and Analyzing Data ###
Grouped Data by Department - Mean Salary:
              Base_Salary
Department               
ABS          64853.219839
BOA         104482.526933
BOE          85517.136856
CAT         129680.870360
CCL         105754.565093

Pivot Table:
Gender                  F              M
Department                              
ABS          70582.401671   63350.483621
BOA         104482.526933            NaN
BOE          88702.861074   81481.886180
CAT         121618.066147  151565.624652
CCL         107665.896680  102222.756726

Sorted Data by Salary:
     Department                  Department_Name  \
821         CEX  Offices of the County Executive   
502         CAT         County Attorney's Office   
8509        POL             Department of Police   
579         CCL                   County Council   
1677        DGS   Department of General Services   

                                          Division Gender  Base_Salary  \
821   CEX 15 Chief 