In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

# Q1: Simple Linear Regression vs. Multiple Linear Regression

# Example of Simple Linear Regression
# Generate some example data
np.random.seed(0)
X_simple = np.random.rand(100, 1) * 10
y_simple = 2 * X_simple + np.random.randn(100, 1) * 2

# Fit Simple Linear Regression model
model_simple = LinearRegression()
model_simple.fit(X_simple, y_simple)

# Plot Simple Linear Regression
plt.figure(figsize=(8, 6))
plt.scatter(X_simple, y_simple, color='blue', label='Data points')
plt.plot(X_simple, model_simple.predict(X_simple), color='red', label='Fitted line')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Simple Linear Regression')
plt.legend()
plt.show()

# Example of Multiple Linear Regression
# Generate some example data
np.random.seed(0)
X_multiple = np.random.rand(100, 3) * 10
y_multiple = 3 * X_multiple[:, 0] + 2 * X_multiple[:, 1] - X_multiple[:, 2] + np.random.randn(100) * 2

# Fit Multiple Linear Regression model
model_multiple = LinearRegression()
model_multiple.fit(X_multiple, y_multiple)

# Q1 Summary
simple_regression = """
Simple Linear Regression involves a single predictor variable and aims to model the relationship between that predictor and the target variable. For example, predicting house prices based on square footage alone.
Multiple Linear Regression involves multiple predictor variables and models the relationship between them and the target variable. For example, predicting house prices based on square footage, number of bedrooms, and location.
"""

# Q2: Assumptions of Linear Regression

# Checking assumptions (visualization and statistical tests are needed in practice)
assumptions = """
1. Linearity: The relationship between predictors and the target is linear.
2. Independence: Observations are independent of each other.
3. Homoscedasticity: The residuals have constant variance.
4. Normality of Residuals: Residuals are normally distributed.

You can check these assumptions by:
- Plotting residuals vs. fitted values to check for homoscedasticity.
- Using Q-Q plots or Shapiro-Wilk test to check residual normality.
- Checking for multicollinearity using VIF (Variance Inflation Factor).
"""

# Q3: Interpreting Slope and Intercept

# Example interpretation
intercept = model_simple.intercept_[0]
slope = model_simple.coef_[0][0]
example_interpretation = f"""
In the simple linear regression model: y = {intercept:.2f} + {slope:.2f} * X
- The intercept (b0) is {intercept:.2f}. It represents the expected value of y when X = 0.
- The slope (b1) is {slope:.2f}. It indicates the change in y for a one-unit change in X.
For example, if we predict house prices, the intercept is the price when the size is zero, and the slope represents how much the price increases for each additional square foot.
"""

# Q4: Gradient Descent

gradient_descent = """
Gradient Descent is an optimization algorithm used to minimize the cost function by iteratively moving in the direction of the steepest descent. In machine learning, it is used to find the optimal parameters (weights) for the model.
The process involves:
1. Initializing parameters randomly.
2. Calculating the gradient of the cost function.
3. Updating parameters in the opposite direction of the gradient.
4. Repeating until convergence.
"""

# Q5: Multiple Linear Regression Model

multiple_linear_regression = """
Multiple Linear Regression involves modeling the relationship between multiple predictor variables and a target variable. Unlike Simple Linear Regression, which uses one predictor, Multiple Linear Regression accounts for the effects of multiple predictors simultaneously.
"""

# Q6: Multicollinearity in Multiple Linear Regression

# Example to detect multicollinearity
X_with_const = add_constant(X_multiple)
vif = pd.DataFrame()
vif['Variable'] = X_with_const.columns
vif['VIF'] = [variance_inflation_factor(X_with_const.values, i) for i in range(X_with_const.shape[1])]

multicollinearity_summary = """
Multicollinearity occurs when predictor variables are highly correlated with each other, which can lead to unstable estimates of coefficients.
- Detection: Use VIF (Variance Inflation Factor). VIF values greater than 10 indicate significant multicollinearity.
- Addressing: Remove highly correlated predictors, combine predictors, or use techniques like Principal Component Analysis (PCA).
"""

# Q7: Polynomial Regression Model

# Example of Polynomial Regression
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_simple)
model_poly = LinearRegression()
model_poly.fit(X_poly, y_simple)

# Plot Polynomial Regression
plt.figure(figsize=(8, 6))
plt.scatter(X_simple, y_simple, color='blue', label='Data points')
plt.plot(X_simple, model_poly.predict(poly.transform(X_simple)), color='green', label='Polynomial fit')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Polynomial Regression')
plt.legend()
plt.show()

# Q7 Summary
polynomial_regression = """
Polynomial Regression extends Linear Regression by including polynomial terms of the predictor variables. This allows for modeling non-linear relationships.
For example, predicting house prices with a quadratic term for square footage.
"""

# Q8: Advantages and Disadvantages of Polynomial Regression

polynomial_advantages_disadvantages = """
Advantages:
1. Can capture non-linear relationships between predictors and the target.
2. Allows for more flexibility in modeling.

Disadvantages:
1. Can lead to overfitting, especially with high-degree polynomials.
2. More complex models can be harder to interpret.

Use Polynomial Regression when you suspect a non-linear relationship between predictors and the target variable.
"""

# Display results
print("Q1: Simple vs Multiple Linear Regression")
print(simple_regression)

print("\nQ2: Assumptions of Linear Regression")
print(assumptions)

print("\nQ3: Interpreting Slope and Intercept")
print(example_interpretation)

print("\nQ4: Gradient Descent")
print(gradient_descent)

print("\nQ5: Multiple Linear Regression Model")
print(multiple_linear_regression)

print("\nQ6: Multicollinearity in Multiple Linear Regression")
print(multicollinearity_summary)
print(vif)

print("\nQ7: Polynomial Regression Model")
print(polynomial_regression)

print("\nQ8: Advantages and Disadvantages of Polynomial Regression")
print(polynomial_advantages_disadvantages)
