In [None]:
import numpy as np
from scipy.stats import bernoulli, binom, norm, uniform
import matplotlib.pyplot as plt

# Q1: What are the Probability Mass Function (PMF) and Probability Density Function (PDF)? Explain with an example.
# PMF: A Probability Mass Function is used for discrete random variables, giving the probability that a discrete random variable is exactly equal to some value.
# PDF: A Probability Density Function is used for continuous random variables, and it describes the likelihood of the variable taking on a given value.

# Example of PMF: Tossing a coin (Bernoulli distribution)
p_heads = 0.5
bernoulli_pmf = bernoulli.pmf(k=[0, 1], p=p_heads)
print("Bernoulli PMF for heads and tails: ", bernoulli_pmf)

# Example of PDF: Normal distribution
x = np.linspace(-3, 3, 1000)
pdf_values = norm.pdf(x, loc=0, scale=1)
plt.plot(x, pdf_values)
plt.title("Normal Distribution PDF")
plt.xlabel("Value")
plt.ylabel("Density")
plt.show()

# Q2: What is Cumulative Density Function (CDF)? Explain with an example. Why CDF is used?
# CDF: The Cumulative Distribution Function gives the probability that a random variable will take a value less than or equal to a given value.
# CDF is used to find probabilities for intervals and to understand the distribution of data.

# Example: Normal distribution CDF
cdf_values = norm.cdf(x, loc=0, scale=1)
plt.plot(x, cdf_values)
plt.title("Normal Distribution CDF")
plt.xlabel("Value")
plt.ylabel("Cumulative Probability")
plt.show()

# Q3: What are some examples of situations where the normal distribution might be used as a model?
# Examples: Heights of people, test scores, measurement errors.
# The mean (μ) controls the center of the distribution, while the standard deviation (σ) controls the spread.

# Q4: Explain the importance of Normal Distribution. Give a few real-life examples of Normal Distribution.
# Importance: The normal distribution is important because of the Central Limit Theorem, which states that the sum of many independent random variables tends towards a normal distribution.
# Real-life examples: IQ scores, human heights, measurement errors.

# Q5: What is Bernoulli Distribution? Give an Example. What is the difference between Bernoulli Distribution and Binomial Distribution?
# Bernoulli Distribution: It represents a single trial with two possible outcomes (success or failure).
# Example: Flipping a coin.
# Difference: The Bernoulli distribution represents a single trial, while the binomial distribution represents multiple trials.

# Q6. Consider a dataset with a mean of 50 and a standard deviation of 10. If we assume that the dataset is normally distributed, what is the probability that a randomly selected observation will be greater than 60?
mean, std_dev = 50, 10
z_score = (60 - mean) / std_dev
probability = 1 - norm.cdf(z_score)
print("Probability that an observation is greater than 60: ", probability)

# Q7: Explain uniform Distribution with an example.
# Uniform Distribution: All outcomes are equally likely within a certain interval.
# Example: Rolling a fair six-sided die.

# Example of uniform distribution
uniform_data = uniform.rvs(size=1000, loc=0, scale=10)
plt.hist(uniform_data, bins=10, edgecolor='black')
plt.title("Uniform Distribution Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

# Q8: What is the z score? State the importance of the z score.
# Z-score: A Z-score is the number of standard deviations a data point is from the mean. It standardizes different datasets for comparison.

# Q9: What is Central Limit Theorem? State the significance of the Central Limit Theorem.
# Central Limit Theorem: It states that the sampling distribution of the sample mean will approach a normal distribution as the sample size becomes large, regardless of the original distribution of the data.
# Significance: It allows for making inferences about population parameters using sample statistics.

# Q10: State the assumptions of the Central Limit Theorem.
# 1. The samples must be independent.
# 2. The sample size should be sufficiently large (n > 30 is a common rule of thumb).
# 3. The data should ideally be from a population with finite variance.
