In [None]:
# 1. Types of Data (Qualitative & Quantitative)

# Theoretical Explanation:
# Qualitative Data: Descriptive, non-numeric (e.g., Colors, Car brands).
# Quantitative Data: Numeric, measurable (e.g., Height, Number of students).

# Practical Example
qualitative_data = ["Red", "Blue", "Black"]
quantitative_data = [10, 20, 30]

print("Qualitative Data:", qualitative_data)
print("Quantitative Data:", quantitative_data)


# 2. Measures of Central Tendency: Mean, Median, Mode

# Theoretical Explanation:
# Mean: Average value, used when data is evenly distributed.
# Median: Middle value, useful when data has outliers.
# Mode: Most frequent value, useful for categorical data.

import numpy as np
from scipy import stats

data = [10, 20, 20, 30, 40]

mean_value = np.mean(data)
median_value = np.median(data)
mode_value = stats.mode(data).mode[0]

print("Mean:", mean_value)
print("Median:", median_value)
print("Mode:", mode_value)


# 3. Concept of Dispersion: Variance & Standard Deviation

# Theoretical Explanation:
# Variance: Measures data spread from the mean.
# Standard Deviation: Square root of variance, shows data spread.

variance_value = np.var(data, ddof=1)
std_dev_value = np.std(data, ddof=1)

print("Variance:", variance_value)
print("Standard Deviation:", std_dev_value)


# 4. Box Plot & Data Distribution

# Theoretical Explanation:
# Box plot shows data distribution, median, quartiles, and outliers.

import matplotlib.pyplot as plt

plt.boxplot(data)
plt.title("Box Plot")
plt.show()


# 5. Role of Random Sampling

# Theoretical Explanation:
# Random sampling ensures unbiased selection for accurate analysis.
# Example: Selecting 100 students randomly for a study.


# 6. Skewness & Its Types

# Theoretical Explanation:
# Positive Skew: Right tail longer (e.g., Income distribution).
# Negative Skew: Left tail longer (e.g., Difficult exam scores).

skewed_data = [1, 2, 2, 3, 4, 10]
skewness_value = stats.skew(skewed_data)
print("Skewness Value:", skewness_value)


# 7. Interquartile Range (IQR) & Outliers

# Theoretical Explanation:
# IQR = Q3 - Q1, middle 50% of data.
# Outliers are values beyond 1.5 × IQR outside Q1 and Q3.

Q1 = np.percentile(data, 25)
Q3 = np.percentile(data, 75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = [x for x in data if x < lower_bound or x > upper_bound]

print("IQR:", IQR)
print("Outliers:", outliers)


# 8. Binomial Distribution Example

# Theoretical Explanation:
# Used when there are fixed trials, two outcomes (Success/Failure), and constant probability.
# Example: Tossing a coin 10 times.

from scipy.stats import binom

n = 10
p = 0.5
k = 5

binomial_prob = binom.pmf(k, n, p)
print("Binomial Probability of 5 successes in 10 trials:", binomial_prob)


# 9. Normal Distribution & Empirical Rule

# Theoretical Explanation:
# Bell-shaped curve.
# 68% of data within 1 std deviation, 95% within 2, 99.7% within 3.

mu, sigma = 0, 1
x = np.linspace(-4, 4, 100)
pdf = stats.norm.pdf(x, mu, sigma)

plt.plot(x, pdf)
plt.title("Normal Distribution")
plt.show()


# 10. Poisson Process Example

# Theoretical Explanation:
# Models rare events over time or space.
# Example: Calls received per hour in a call center.

from scipy.stats import poisson

lambda_val = 4
k = 6

poisson_prob = poisson.pmf(k, lambda_val)
print("Poisson Probability of 6 calls:", poisson_prob)


# 11. Random Variables: Discrete vs. Continuous

# Theoretical Explanation:
# Discrete: Takes specific values (e.g., Number of students).
# Continuous: Can take any value in a range (e.g., Height).


# 12. Covariance & Correlation Example

# Theoretical Explanation:
# Covariance: Measures direction of relationship.
# Correlation: Measures strength (-1 to 1).

study_hours = [2, 3, 4, 5, 6]
test_scores = [50, 55, 60, 65, 70]

cov_matrix = np.cov(study_hours, test_scores)
correlation_value = np.corrcoef(study_hours, test_scores)[0, 1]

print("Covariance Matrix:\n", cov_matrix)
print("Correlation Coefficient:", correlation_value)
