## Dataset: The annual salaries (in $1000) of 20 employees in a tech company:
[45, 48, 50, 52, 55, 58, 60, 62, 65, 68, 70, 72, 75, 78, 80, 85, 90, 95, 150, 500]

# Calculate:
### Range
### Variance 
### Standard deviation 
### Q2 
### IQR (Interquartile Range)
### Calculate the lower and upper fences for outlier detection
### Identify any outliers using the IQR method
### How many outliers are there and what are their values?


In [4]:
import numpy as np
import pandas as pd

# Given dataset
salaries = [45, 48, 50, 52, 55, 58, 60, 62, 65, 68, 70, 72, 75, 78, 80, 85, 90, 95, 150, 500]

# Convert to NumPy array and Pandas Series (optional, for flexibility)
data_np = np.array(salaries)
data_pd = pd.Series(salaries)

# ---- Basic Statistics ----

# Number of data points
n = len(data_np)
print(f"Number of employees: {n}")

# Min and Max
min_salary = np.min(data_np)
max_salary = np.max(data_np)
print(f"Minimum Salary: {min_salary}K")
print(f"Maximum Salary: {max_salary}K")

# Range
data_range = max_salary - min_salary
print(f"Range: {data_range}")

# ---- Variance and Standard Deviation ----
variance = np.var(data_np, ddof=0)  # Population variance (ddof=0)
std_dev = np.std(data_np, ddof=0)   # Population standard deviation

# Optional: Sample variance (ddof=1) if treating as sample
# variance_sample = np.var(data_np, ddof=1)
# std_dev_sample = np.std(data_np, ddof=1)

print(f"Variance: {variance:.2f}")
print(f"Standard Deviation: {std_dev:.2f}")

# ---- Median (Q2) ----
q2 = np.median(data_np)
print(f"Median (Q2): {q2}")

# ---- Quartiles (Q1 and Q3) ----
q1 = np.percentile(data_np, 25)
q3 = np.percentile(data_np, 75)
iqr = q3 - q1

print(f"First Quartile (Q1): {q1}")
print(f"Third Quartile (Q3): {q3}")
print(f"Interquartile Range (IQR): {iqr}")

# ---- Outlier Detection Using IQR Method ----
lower_fence = q1 - 1.5 * iqr
upper_fence = q3 + 1.5 * iqr

print(f"Lower Fence: {lower_fence}")
print(f"Upper Fence: {upper_fence}")

# Identify outliers
outliers = data_np[(data_np < lower_fence) | (data_np > upper_fence)]
print(f"Outliers: {outliers}")
print(f"Number of Outliers: {len(outliers)}")

Number of employees: 20
Minimum Salary: 45K
Maximum Salary: 500K
Range: 455
Variance: 9232.49
Standard Deviation: 96.09
Median (Q2): 69.0
First Quartile (Q1): 57.25
Third Quartile (Q3): 81.25
Interquartile Range (IQR): 24.0
Lower Fence: 21.25
Upper Fence: 117.25
Outliers: [150 500]
Number of Outliers: 2
