[Reference](https://medium.com/@ccpythonprogramming/python-fundamental-math-for-data-science-633c8cb9992c)

# 1. Probability in Spam Detection

In [1]:
# spam_detection.py
# Sample data
# Prior probabilities: P(Spam) and P(Not Spam)
p_spam = 0.4  # Probability that an email is spam
p_not_spam = 0.6  # Probability that an email is not spam

# Likelihoods: P(Keyword | Spam) and P(Keyword | Not Spam)
p_keyword_buy_given_spam = 0.7  # Probability "buy" appears in spam emails
p_keyword_buy_given_not_spam = 0.2  # Probability "buy" appears in non-spam emails
p_keyword_free_given_spam = 0.8  # Probability "free" appears in spam emails
p_keyword_free_given_not_spam = 0.3  # Probability "free" appears in non-spam emails

# Total probability for the keywords in both spam and non-spam
p_keywords_given_spam = p_keyword_buy_given_spam * p_keyword_free_given_spam
p_keywords_given_not_spam = p_keyword_buy_given_not_spam * p_keyword_free_given_not_spam

# Calculate P(Spam | Keywords) using Bayes' theorem
p_keywords = (p_keywords_given_spam * p_spam) + (p_keywords_given_not_spam * p_not_spam)
p_spam_given_keywords = (p_keywords_given_spam * p_spam) / p_keywords

print(f"Probability that the email is spam given 'buy' and 'free': {p_spam_given_keywords:.2f}")

Probability that the email is spam given 'buy' and 'free': 0.86


# 2. Descriptive Statistics in Customer Analytics

In [2]:
import numpy as np

# Customer spending data (in dollars)
spending = [100, 150, 200, 250, 300, 350, 400]

# Calculate mean, median, and standard deviation
mean_spending = np.mean(spending)
median_spending = np.median(spending)
std_dev_spending = np.std(spending)

print(f"Mean Spending: ${mean_spending:.2f}")
print(f"Median Spending: ${median_spending:.2f}")
print(f"Standard Deviation in Spending: ${std_dev_spending:.2f}")

Mean Spending: $250.00
Median Spending: $250.00
Standard Deviation in Spending: $100.00


# 3. Linear Regression for Sales Forecasting

In [3]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Data: Months and Sales (in thousands of dollars)
months = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
sales = np.array([10, 12, 15, 20, 22])  # Sales in $1000s

# Create a linear regression model
model = LinearRegression()

# Fit the model to our data
model.fit(months, sales)

# Predict sales for the 6th month
predicted_sales = model.predict([[6]])
print(f"Predicted Sales for month 6: ${predicted_sales[0]:.2f}k")

Predicted Sales for month 6: $25.40k


# Matrix Algebra in Machine Learning

In [4]:
import numpy as np

# Input matrix (representing data for 2 samples with 3 features)
X = np.array([[1, 2, 3], [4, 5, 6]])

# Weight matrix (connecting 3 input features to 2 output nodes)
W = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])

# Perform matrix multiplication (input * weights)
output = np.dot(X, W)
print(f"Output Matrix:\n{output}")

Output Matrix:
[[2.2 2.8]
 [4.9 6.4]]


# 5. Calculus in Machine Learning: Gradient Descent

In [5]:
# Gradient Descent algorithm
def gradient_descent(derivative_func, initial_x, learning_rate, epochs):
    x = initial_x
    for _ in range(epochs):
        grad = derivative_func(x)
        x -= learning_rate * grad  # Update rule
    return x

# Derivative of f(x) = x^2, which is f'(x) = 2x
def derivative(x):
    return 2 * x

# Minimize the function starting from x=10
min_x = gradient_descent(derivative, initial_x=10, learning_rate=0.1, epochs=100)
print(f"Value of x that minimizes the function: {min_x:.2f}")

Value of x that minimizes the function: 0.00


# 6. Hypothesis Testing in A/B Testing

In [6]:
from scipy import stats

# Conversion rates for two groups (A and B)
group_A = [10, 12, 14, 16, 18]
group_B = [20, 22, 24, 26, 28]

# Perform an independent t-test
t_stat, p_value = stats.ttest_ind(group_A, group_B)

print(f"T-statistic: {t_stat:.2f}")
print(f"P-value: {p_value:.5f}")

# If p-value < 0.05, we reject the null hypothesis (i.e., a significant difference exists)
if p_value < 0.05:
    print("There is a statistically significant difference between Group A and Group B.")
else:
    print("No statistically significant difference between Group A and Group B.")

T-statistic: -5.00
P-value: 0.00105
There is a statistically significant difference between Group A and Group B.
