In [None]:
# Import necessary libraries
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
import pandas as pd

# Q1: What is Min-Max scaling, and how is it used in data preprocessing? Provide an example to illustrate its application.

min_max_scaling_explanation = """
**Min-Max Scaling**: Min-Max scaling (also known as normalization) transforms features to a fixed range, usually [0, 1]. It rescales each feature to ensure that its minimum value becomes 0 and its maximum value becomes 1.

*How It Works:*
1. For each feature, subtract the minimum value of the feature.
2. Divide the result by the range (maximum value - minimum value) of the feature.

*Example:*
Let's apply Min-Max scaling to the dataset [1, 5, 10, 15, 20].

*Formula:* X_scaled = (X - X_min) / (X_max - X_min)

*Applying the formula:*
- Min value (X_min) = 1
- Max value (X_max) = 20
- Scaled value = (X - 1) / (20 - 1)

The scaled values are: [0, 0.2105, 0.4737, 0.7368, 1.0]
"""

# Q2: What is the Unit Vector technique in feature scaling, and how does it differ from Min-Max scaling? Provide an example to illustrate its application.

unit_vector_explanation = """
**Unit Vector Scaling**: Unit Vector Scaling (also known as normalization or vector normalization) scales the feature values so that the length (or magnitude) of the vector is 1. It converts the feature values into a unit vector.

*How It Works:*
1. Compute the Euclidean norm (or length) of the feature vector.
2. Divide each feature value by the norm.

*Difference from Min-Max Scaling:*
- Min-Max Scaling rescales data to a specific range, while Unit Vector Scaling ensures that each feature vector has a length of 1, without altering the distribution of the values within the vector.

*Example:*
Let's apply Unit Vector Scaling to the dataset [2, 3, 6].

*Formula:* X_normalized = X / ||X||

- Compute the norm: ||X|| = sqrt(2^2 + 3^2 + 6^2) = sqrt(49) = 7
- Scaled values: [2/7, 3/7, 6/7] = [0.2857, 0.4286, 0.8571]
"""

# Q3: What is PCA (Principal Component Analysis), and how is it used in dimensionality reduction? Provide an example to illustrate its application.

pca_explanation = """
**PCA (Principal Component Analysis)**: PCA is a dimensionality reduction technique that transforms data into a new coordinate system where the greatest variances lie on the first axes (principal components).

*How It Works:*
1. Standardize the data.
2. Compute the covariance matrix.
3. Compute the eigenvalues and eigenvectors of the covariance matrix.
4. Sort the eigenvectors by eigenvalues in descending order.
5. Select the top k eigenvectors to form a new feature subspace.

*Example:*
Let's apply PCA to a 2D dataset with features [X1, X2].

1. Standardize the data.
2. Compute covariance matrix.
3. Calculate eigenvalues and eigenvectors.
4. Transform data into principal components (e.g., first 2 principal components).
"""

# Q4: What is the relationship between PCA and Feature Extraction, and how can PCA be used for Feature Extraction? Provide an example to illustrate this concept.

pca_feature_extraction_explanation = """
**PCA and Feature Extraction:**
- **PCA** is a technique used for feature extraction, which involves creating new features (principal components) from the original features.
- **Feature Extraction** with PCA reduces the dimensionality by projecting data onto a new feature space formed by the principal components.

*Example:*
1. Apply PCA to a dataset with 5 features.
2. PCA may reduce these to 2 principal components that capture the majority of the variance.
3. The new 2D feature space can be used for further analysis, simplifying the model while retaining most of the original information.
"""

# Q5: You are working on a project to build a recommendation system for a food delivery service. The dataset contains features such as price, rating, and delivery time. Explain how you would use Min-Max scaling to preprocess the data.

min_max_scaling_recommendation_explanation = """
**Using Min-Max Scaling for Recommendation System:**
1. **Apply Min-Max Scaling:** Normalize features like price, rating, and delivery time to the range [0, 1].
2. **Fit the Scaler:** Use MinMaxScaler from sklearn to fit the scaling parameters on the training data.
3. **Transform Data:** Transform the dataset to scale the features between 0 and 1.

*Benefits:*
- Ensures all features contribute equally to the model.
- Improves convergence and performance of machine learning algorithms.
"""

# Q6: You are working on a project to build a model to predict stock prices. The dataset contains many features, such as company financial data and market trends. Explain how you would use PCA to reduce the dimensionality of the dataset.

pca_stock_prices_explanation = """
**Using PCA for Stock Price Prediction:**
1. **Standardize the Data:** Ensure features have zero mean and unit variance.
2. **Apply PCA:** Use PCA to reduce the dimensionality of the dataset by selecting principal components that explain the most variance.
3. **Transform Data:** Project the data onto the principal components.

*Benefits:*
- Reduces computational complexity.
- Helps in visualizing high-dimensional data.
- Can improve model performance by removing noise and redundant features.
"""

# Q7: For a dataset containing the following values: [1, 5, 10, 15, 20], perform Min-Max scaling to transform the values to a range of -1 to 1.

def min_max_scaling_range(data, new_min=-1, new_max=1):
    min_val = np.min(data)
    max_val = np.max(data)
    scaled_data = [(x - min_val) / (max_val - min_val) * (new_max - new_min) + new_min for x in data]
    return scaled_data

data = [1, 5, 10, 15, 20]
scaled_data_range = min_max_scaling_range(data)
scaled_data_range

# Q8: For a dataset containing the following features: [height, weight, age, gender, blood pressure], perform Feature Extraction using PCA. How many principal components would you choose to retain, and why?

def pca_feature_extraction(data, n_components):
    pca = PCA(n_components=n_components)
    pca.fit(data)
    return pca.explained_variance_ratio_

# Example Data
data = pd.DataFrame({
    'height': [150, 160, 170, 180, 190],
    'weight': [50, 60, 70, 80, 90],
    'age': [20, 25, 30, 35, 40],
    'gender': [0, 1, 0, 1, 0],  # 0 = Female, 1 = Male
    'blood pressure': [120, 130, 140, 150, 160]
})

# Fit PCA to the example data
pca = PCA(n_components=2)  # Example choice for components
pca.fit(data)
explained_variance = pca.explained_variance_ratio_
explained_variance
