# Section 5 — Data Science with Python
This notebook introduces **NumPy**, **Pandas**, **Visualization with Matplotlib**, and **Basic Statistics** using practical, self‑contained examples.

## 1) NumPy Basics — Arrays & Vectorization

In [None]:

import numpy as np

# Create arrays
a = np.array([1, 2, 3, 4])
b = np.arange(0, 10, 2)  # start, stop, step
c = np.linspace(0, 1, 5) # 5 points from 0 to 1

print("a:", a)
print("b:", b)
print("c:", c)

# Vectorized operations
print("a * 2:", a * 2)
print("a + 10:", a + 10)
print("a ** 2:", a ** 2)

# Shapes and reshape
m = np.arange(12).reshape(3, 4)
print("Matrix m:\n", m)
print("m.shape:", m.shape, "m.ndim:", m.ndim)


### Broadcasting

In [None]:

x = np.ones((3, 4))
y = np.array([1, 2, 3, 4])
print("x + y (broadcasting):\n", x + y)


### Linear Algebra (dot, transpose, inverse)

In [None]:

M = np.array([[2., 1.], [1., 3.]])
v = np.array([4., 5.])

# Dot products
print("M dot v:", M.dot(v))

# Transpose
print("M.T:\n", M.T)

# Inverse and solve
Minv = np.linalg.inv(M)
print("Inverse(M):\n", Minv)

sol = np.linalg.solve(M, v)
print("Solve Mx=v -> x:", sol)


## 2) Pandas — DataFrames & Series

In [None]:

import pandas as pd

data = {
    "city": ["Montreal", "Toronto", "Vancouver", "Calgary", "Montreal"],
    "sales": [120, 200, 150, 130, 170],
    "month": pd.to_datetime(["2025-01-01", "2025-01-01", "2025-01-01", "2025-01-01", "2025-02-01"]),
}
df = pd.DataFrame(data)
print(df)
print("\nInfo:")
print(df.info())
print("\nDescribe:")
print(df.describe())


### Selection & Filtering

In [None]:

# Select columns
print(df["city"].head())
print(df[["city", "sales"]].head())

# Boolean filtering
print("\nSales > 150:")
print(df[df["sales"] > 150])

# loc / iloc
print("\nloc (rows by condition, specific columns):")
print(df.loc[df["city"] == "Montreal", ["city", "sales"]])

print("\niloc (by positions):")
print(df.iloc[0:2, 0:2])


### GroupBy & Aggregation

In [None]:

grouped = df.groupby("city")["sales"].agg(["count", "sum", "mean"])
print(grouped)

# Group by month and city
multi = df.groupby(["month", "city"], as_index=False)["sales"].sum()
print("\nBy month & city:")
print(multi)


### Merge / Join

In [None]:

targets = pd.DataFrame({
    "city": ["Montreal", "Toronto", "Vancouver", "Ottawa"],
    "target": [160, 210, 140, 100]
})

merged = pd.merge(df, targets, on="city", how="left")
print(merged)


### Datetime Handling

In [None]:

df["year"] = df["month"].dt.year
df["month_num"] = df["month"].dt.month
df["month_name"] = df["month"].dt.strftime("%b")
print(df[["month","year","month_num","month_name"]])

# Resample example: monthly sum (construct a longer example)
dates = pd.date_range("2025-01-01", periods=60, freq="D")
sales = pd.Series(np.random.randint(50, 200, size=60), index=dates)
monthly = sales.resample("M").sum()
print("\nMonthly totals:")
print(monthly)


## 3) Visualization with Matplotlib

In [None]:

import matplotlib.pyplot as plt

# Line plot (monthly totals)
plt.figure()
monthly.plot()
plt.title("Monthly Sales Totals")
plt.xlabel("Month")
plt.ylabel("Sales")
plt.show()

# Bar plot (by city)
city_totals = df.groupby("city")["sales"].sum()
plt.figure()
city_totals.plot(kind="bar")
plt.title("Total Sales by City")
plt.xlabel("City")
plt.ylabel("Total Sales")
plt.show()


## 4) Basic Statistics

In [None]:

# Descriptive statistics
print("Mean sales:", df["sales"].mean())
print("Median sales:", df["sales"].median())
print("Std dev:", df["sales"].std())
print("Min/Max:", df["sales"].min(), df["sales"].max())
print("Quantiles:", df["sales"].quantile([0.25, 0.5, 0.75]))

# Correlation example
print("\nCorrelation (dummy example)")
df2 = df.copy()
df2["ad_spend"] = [50, 80, 40, 30, 60]
print(df2[["sales", "ad_spend"]].corr())

# Simple probability-like simulation (Monte Carlo style)
np.random.seed(42)
sim_samples = np.random.normal(loc=150, scale=20, size=10000)
print("\nSimulated sales ~ N(150, 20):")
print("Mean:", sim_samples.mean(), "Std:", sim_samples.std())
