In [8]:
import numpy as np
import pandas as pd
np.random.seed(42)
n = 150

In [10]:
ids = np.arange(1, n+1)
ages = np.random.randint(18, 60, size=n) 
incomes = np.random.lognormal(mean=10, sigma=0.6, size=n)
nan_idx = np.random.choice(n, size=12, replace=False)
incomes[nan_idx] = np.nan
scores = np.clip(np.random.normal(loc=70, scale=10, size=n), 0, 100)
score_nan_idx = np.random.choice(n, size=8, replace=False)
scores[score_nan_idx] = np.nan
df = pd.DataFrame({
    "id": ids,
    "age": ages,
    "income": incomes,
    "score": scores
})

print("Synthetic Dataset (first 10 rows):")
print(df.head(10)

Synthetic Dataset (first 10 rows):
   id  age        income      score
0   1   56  16028.823956  71.035681
1   2   46  29970.248739  72.676360
2   3   32  23347.531254  56.576872
3   4   25  39386.838621  36.359045
4   5   38  14454.602337  81.235418
5   6   56           NaN  91.432052
6   7   36  17408.869699  66.574111
7   8   40   9153.494804  72.493145
8   9   28  26309.152627  73.245183
9  10   28  25761.416403  86.752932


In [14]:

avg_income = df["income"].mean(skipna=True)
std_income = df["income"].std(skipna=True)

df["income_zscore"] = (df["income"] - avg_income) / std_income

outlier_rows = df[df["income_zscore"].abs() > 3]

print("Income values with corresponding z-scores:\n", df[["income", "income_zscore"]])
print("\nTotal Outliers Found:", outlier_rows.shape[0])
print("Details of Outlier Records:\n", outlier_rows)

Income values with corresponding z-scores:
            income  income_zscore
0    16028.823956      -0.505136
1    29970.248739       0.082064
2    23347.531254      -0.196879
3    39386.838621       0.478682
4    14454.602337      -0.571440
..            ...            ...
145  14898.857206      -0.552729
146  79727.229279       2.177780
147  32220.282141       0.176833
148   6534.913882      -0.905010
149  24633.746452      -0.142705

[150 rows x 2 columns]

Total Outliers Found: 2
Details of Outlier Records:
       id  age         income      score  income_zscore
91    92   52  112656.821625  78.075556       3.564743
121  122   31  222267.945415  87.185994       8.181459


In [17]:
avg_income = df["income"].mean(skipna=True)
std_income = df["income"].std(skipna=True)
df["income_zscore"] = (df["income"] - avg_income) / std_income
outlier_rows = df[df["income_zscore"].abs() > 3]
print("Income values with corresponding z-scores:\n", df[["income", "income_zscore"]])
print("\nTotal Outliers Found:", outlier_rows.shape[0])
print("Details of Outlier Records:\n", outlier_rows)

Income values with corresponding z-scores:
            income  income_zscore
0    16028.823956      -0.505136
1    29970.248739       0.082064
2    23347.531254      -0.196879
3    39386.838621       0.478682
4    14454.602337      -0.571440
..            ...            ...
145  14898.857206      -0.552729
146  79727.229279       2.177780
147  32220.282141       0.176833
148   6534.913882      -0.905010
149  24633.746452      -0.142705

[150 rows x 2 columns]

Total Outliers Found: 2
Details of Outlier Records:
       id  age         income      score  income_zscore
91    92   52  112656.821625  78.075556       3.564743
121  122   31  222267.945415  87.185994       8.181459


In [5]:
bins = [18, 25, 35, 45, 60]
labels = ["[18-25)", "[25-35)", "[35-45)", "[45-60)"]
df["age_bin"] = pd.cut(df["age"], bins=bins, labels=labels, right=False)

group = df.groupby("age_bin").agg(
    count_obs=("id", "count"),
    mean_income=("income", "mean"),
    median_score=("score", "median")
).reset_index().sort_values("age_bin")

print("\nProblem 3 Results (by Age Bin):")
print(group)



Problem 3 Results (by Age Bin):
   age_bin  count_obs   mean_income  median_score
0  [18-25)         25  24182.233725     67.349189
1  [25-35)         36  32561.317191     69.271574
2  [35-45)         36  26829.279945     69.024289
3  [45-60)         53  27440.062782     71.655773


In [36]:

A = np.array([[1, 2, 3],
              [4, 5, 6]])   
print("\nProblem 4 Results:")
print("Array A:\n", A)
print("Shape:", A.shape)
print("Size:", A.size)



Problem 4 Results:
Array A:
 [[1 2 3]
 [4 5 6]]
Shape: (2, 3)
Size: 6


In [38]:
print("Transpose:\n", A.T)
print("Flatten:", A.flatten())

print("Last row (A[-1]):", A[-1])
print("Last element (A[-1,-1]):", A[-1, -1])
try:
    _ = A[:, -10]
except Exception as e:
    print("Error while slicing with invalid index:", e)


Transpose:
 [[1 4]
 [2 5]
 [3 6]]
Flatten: [1 2 3 4 5 6]
Last row (A[-1]): [4 5 6]
Last element (A[-1,-1]): 6
Error while slicing with invalid index: index -10 is out of bounds for axis 1 with size 3


In [40]:
row_vec = np.array([10, 20, 30])
print("\nBroadcasting (A + [10,20,30]):\n", A + row_vec)



Broadcasting (A + [10,20,30]):
 [[11 22 33]
 [14 25 36]]


In [42]:
B = np.array([[1, 0],
              [0, 1],
              [1, 1]])
print("\nDot Product (A.dot(B)):\n", A.dot(B))


Dot Product (A.dot(B)):
 [[ 4  5]
 [10 11]]


In [44]:
M = np.random.rand(3,3) + np.eye(3)  
detM = np.linalg.det(M)
invM = np.linalg.inv(M)

print("\nMatrix M:\n", M)
print("\nDeterminant of M:", detM)
print("\nInverse of M:\n", invM)


Matrix M:
 [[1.68111785 0.18143835 0.52516338]
 [0.70904626 1.10687692 0.56731222]
 [0.25656278 0.96292688 1.48354565]]

Determinant of M: 1.8871821801079591

Inverse of M:
 [[ 0.58066586  0.1253307  -0.25347793]
 [-0.48026698  1.25015895 -0.30805376]
 [ 0.21130778 -0.83311683  0.91784587]]
