## Section 1: Unit 1
### Task 1: Collect Dataset & Visualize

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('your_dataset.csv')
df.head()

# Identify numerical and categorical columns
numerical_cols = df.select_dtypes(include='number').columns
categorical_cols = df.select_dtypes(include='object').columns

print('Numerical Columns:', numerical_cols)
print('Categorical Columns:', categorical_cols)

# Visualize numerical distributions
for col in numerical_cols:
    sns.histplot(df[col], kde=True, color='skyblue')
    plt.title(f'Distribution of {col}')
    plt.show()

# Visualize categorical distributions
for col in categorical_cols:
    sns.countplot(x=col, data=df, palette='Set2')
    plt.title(f'Distribution of {col}')
    plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'your_dataset.csv'

### Task 2: Compare Mean & Median for Skewed Datasets

In [None]:
for col in numerical_cols:
    mean_val = df[col].mean()
    median_val = df[col].median()
    print(f'{col}: Mean={mean_val}, Median={median_val}')

# Comment: For skewed distributions, median is more robust against outliers.

### Task 3: Mini-Report on Sampling Strategy
- **Simple Random Sampling:** Equal chance for each student.
- **Stratified Sampling:** Proportional representation by department/year/gender.
- **Cluster Sampling:** Surveying full classes/batches.
- **Recommended:** Stratified sampling for campus surveys ensures fair representation.

## Unit 2: Probability

### Task 1: Binomial Probability

In [None]:
from scipy.stats import binom

# Example: 10 products, 10% defect, probability of 2 defective
n, p, k = 10, 0.1, 2
prob = binom.pmf(k, n, p)
print('Binomial Probability:', prob)

### Task 2: Z-score Calculation

In [None]:
x, mu, sigma = 85, 80, 5
z = (x - mu) / sigma
print('Z-score:', z)

### Task 3: Normal Approximation to Binomial

In [None]:
from scipy.stats import norm

mu_norm, sigma_norm = n*p, (n*p*(1-p))**0.5
prob_norm = norm.pdf(k, mu_norm, sigma_norm)
print('Normal Approximation Probability:', prob_norm)

## Section 2: Unit 3

### Task 1: 95% Confidence Interval for Proportion

In [None]:
import statsmodels.api as sm
success, n_total = 45, 100
ci_low, ci_high = sm.stats.proportion_confint(success, n_total, alpha=0.05, method='normal')
print(f'95% CI for proportion: ({ci_low:.3f}, {ci_high:.3f})')

### Task 2: Hypothesis Test for Population Mean

In [None]:
from scipy import stats
sample = [10, 12, 9, 11, 13]
t_stat, p_val = stats.ttest_1samp(sample, popmean=10)
print('t-statistic:', t_stat, 'p-value:', p_val)

### Task 3: One-way ANOVA

In [None]:
group1 = [10, 12, 11]
group2 = [14, 15, 16]
group3 = [13, 14, 15]

f_stat, p_val = stats.f_oneway(group1, group2, group3)
print('ANOVA F-statistic:', f_stat, 'p-value:', p_val)

## Unit 4: t-tests

### Task 1: Paired t-test (Before & After)

In [None]:
before = [50, 55, 60]
after = [52, 57, 63]
t_stat, p_val = stats.ttest_rel(before, after)
print('Paired t-test: t-statistic:', t_stat, 'p-value:', p_val)

### Task 2: Independent Two-sample t-test

In [None]:
machine1 = [10, 12, 11]
machine2 = [14, 15, 13]
t_stat, p_val = stats.ttest_ind(machine1, machine2)
print('Independent t-test: t-statistic:', t_stat, 'p-value:', p_val)

## Additional Python Tutorial Tasks

In [None]:
# Random Sampling
sample_df = df.sample(n=10, random_state=42)
sample_df.mean()

In [None]:
# Manual Binomial Probability Function
from math import comb
def binomial_prob(n, k, p):
    return comb(n, k)*(p**k)*((1-p)**(n-k))

print('Manual Binomial Probability:', binomial_prob(10, 2, 0.1))

In [None]:
# Normal Distribution Curve
x_vals = np.linspace(mu-4*sigma, mu+4*sigma, 100)
y_vals = norm.pdf(x_vals, mu, sigma)
plt.plot(x_vals, y_vals)
plt.title('Normal Distribution Curve')
plt.show()

In [None]:
# Central Limit Theorem - Sampling Distribution
import numpy as np
sample_means = [np.mean(df[numerical_cols].sample(10, replace=True)) for _ in range(1000)]
sns.histplot(sample_means, kde=True)
plt.title('Sampling Distribution of Sample Means')
plt.show()

### End-to-End Case Study
1. Load dataset → 2. EDA → 3. Select test → 4. Perform inferential analysis → 5. Report conclusion

You can use the above sections to perform this final step.