In [1]:
import numpy as np
import pandas as pd

# Part 1: Advanced NumPy

In [2]:
# Axis operations
arr = np.array([[1, 2, 3], [4, 5, 6]])
print("Array:\n", arr)
print("Sum along axis 0 (columns):", np.sum(arr, axis=0))
print("Sum along axis 1 (rows):", np.sum(arr, axis=1))

# Stacking
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
v_stack = np.vstack((a, b))
h_stack = np.hstack((a, b))
print("\nVertical Stack:\n", v_stack)
print("Horizontal Stack:\n", h_stack)

# Splitting arrays
data = np.array([10, 20, 30, 40, 50, 60])
split_data = np.split(data, 3)
print("\nSplit Data:", split_data)

# Matrix multiplication
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print("\nMatrix A:\n", A)
print("Matrix B:\n", B)
print("Matrix Multiplication:\n", np.dot(A, B))


Array:
 [[1 2 3]
 [4 5 6]]
Sum along axis 0 (columns): [5 7 9]
Sum along axis 1 (rows): [ 6 15]

Vertical Stack:
 [[1 2 3]
 [4 5 6]]
Horizontal Stack:
 [1 2 3 4 5 6]

Split Data: [array([10, 20]), array([30, 40]), array([50, 60])]

Matrix A:
 [[1 2]
 [3 4]]
Matrix B:
 [[5 6]
 [7 8]]
Matrix Multiplication:
 [[19 22]
 [43 50]]


# Part 2: Intro to Pandas


In [4]:
# Creating Series and DataFrame
s = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print("\nSeries:\n", s)

df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'Score': [85, 90, 95]
})
print("\nDataFrame:\n", df)

# Accessing columns and rows
print("\nNames:", df['Name'])
print("Row 1:\n", df.iloc[1])




Series:
 a    10
b    20
c    30
dtype: int64

DataFrame:
       Name  Age  Score
0    Alice   25     85
1      Bob   30     90
2  Charlie   35     95

Names: 0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Row 1:
 Name     Bob
Age       30
Score     90
Name: 1, dtype: object


# Part 3: Simple Data Analysis


In [5]:
# Basic statistics
print("\nAverage Age:", df['Age'].mean())
print("Max Score:", df['Score'].max())

# Adding a new column
df['Passed'] = df['Score'] > 80
print("\nUpdated DataFrame:\n", df)


Average Age: 30.0
Max Score: 95

Updated DataFrame:
       Name  Age  Score  Passed
0    Alice   25     85    True
1      Bob   30     90    True
2  Charlie   35     95    True


# Part 4: Mini ML Prep


In [6]:
# Simulate missing values
df.loc[1, 'Score'] = None
print("\nWith Missing Value:\n", df)

# Handling missing values
df['Score'] = df['Score'].fillna(df['Score'].mean())
print("\nFilled Missing Value:\n", df)

# Feature and label split
X = df[['Age', 'Score']]
y = df['Passed']
print("\nFeatures:\n", X)
print("Labels:\n", y)


With Missing Value:
       Name  Age  Score  Passed
0    Alice   25   85.0    True
1      Bob   30    NaN    True
2  Charlie   35   95.0    True

Filled Missing Value:
       Name  Age  Score  Passed
0    Alice   25   85.0    True
1      Bob   30   90.0    True
2  Charlie   35   95.0    True

Features:
    Age  Score
0   25   85.0
1   30   90.0
2   35   95.0
Labels:
 0    True
1    True
2    True
Name: Passed, dtype: bool
