# **Introduction to ML**

In [2]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.linear_model import LogisticRegression
from lime.lime_tabular import LimeTabularExplainer

# Supervised Learning: Simple Perceptron Example

In [4]:
def sign(x):
    ####
    return 1 if x > 0 else -1

def train_perceptron(X, y, learning_rate, epochs):
    ####
    N, D = X.shape
    weights = np.zeros(D + 1)
    X_biased = np.hstack((np.ones((N, 1)), X))
    for epoch in range(epochs):
        errors = 0
        for i in range(N):
            net_input = np.dot(X_biased[i], weights)
            prediction = sign(net_input)
            if prediction != y[i]:
                errors += 1
                weights = weights + learning_rate * y[i] * X_biased[i]
        if errors == 0:
            break
    return weights

def predict_perceptron(X_new, weights):
    ####
    X_new_biased = np.hstack((np.ones((X_new.shape[0], 1)), X_new))
    predictions = []
    for i in range(X_new_biased.shape[0]):
        net_input = np.dot(X_new_biased[i], weights)
        predictions.append(sign(net_input))
    return np.array(predictions)

# Example Usage of Perceptron
X_train_perceptron = np.array([
    [2, 1],
    [3, 2],
    [1, 0.5],
    [4, 3],
    [1.5, 2]
])
y_train_perceptron = np.array([
    -1,
    1,
    -1,
    1,
    -1
])
learning_rate_val = 0.1
epochs_val = 10
final_weights_perceptron = train_perceptron(X_train_perceptron, y_train_perceptron, learning_rate_val, epochs_val)
X_new_data_perceptron = np.array([
    [2.5, 1.5],
    [0.5, 0.2],
    [3.5, 2.8]
])
new_predictions_perceptron = predict_perceptron(X_new_data_perceptron, final_weights_perceptron)

# Ensembles: Bagging (Random Forest Idea) Example

In [5]:
def demonstrate_bagging():
    ####
    np.random.seed(42)

    X_ensemble = np.random.rand(100, 5)
    y_ensemble = (X_ensemble[:, 0] + X_ensemble[:, 1] > 1.0).astype(int)

    X_train_ensemble, X_test_ensemble, y_train_ensemble, y_test_ensemble = train_test_split(
        X_ensemble, y_ensemble, test_size=0.3, random_state=42
    )

    single_tree = DecisionTreeClassifier(max_depth=5, random_state=42)
    single_tree.fit(X_train_ensemble, y_train_ensemble)
    single_tree_pred = single_tree.predict(X_test_ensemble)
    single_tree_accuracy = accuracy_score(y_test_ensemble, single_tree_pred)

    bagging_model = BaggingClassifier(
        estimator=DecisionTreeClassifier(max_depth=5, random_state=42),
        n_estimators=10,
        random_state=42
    )
    bagging_model.fit(X_train_ensemble, y_train_ensemble)
    bagging_pred = bagging_model.predict(X_test_ensemble)
    bagging_accuracy = accuracy_score(y_test_ensemble, bagging_pred)


demonstrate_bagging()

# Deep Learning Architectures - Placeholder for complex models

In [6]:
def conceptual_deep_neural_network():
    ####
    pass

# Convolutional Neural Networks (CNNs) - Conceptual Layers
def conceptual_cnn_layers():
    ####
    pass

# Recurrent Neural Networks (RNNs) and Transformers - Conceptual
def conceptual_rnn_transformer():
    ####
    pass

In [None]:
# Reinforcement Learning - Q-Learning Placeholder
def conceptual_q_learning():
    ####
    pass

# Generative Models: GANs - Conceptual Placeholder
def conceptual_gan():
    ####
    pass

In [7]:
# Section 6
# Ethical Considerations and Interpretability in ML: LIME Example
def demonstrate_lime_interpretability():
    ####
    X_lime = np.array([
        [10, 2, 8],
        [5, 8, 3],
        [9, 3, 7],
        [6, 7, 4],
        [11, 1, 9]
    ])
    y_lime = np.array([1, 0, 1, 0, 1])
    feature_names = ["ExamScore", "StudyHours", "SleepHours"]
    class_names = ["Fail", "Pass"]

    model = LogisticRegression(random_state=42)
    model.fit(X_lime, y_lime)

    ####
    explainer = LimeTabularExplainer(
        training_data=X_lime,
        feature_names=feature_names,
        class_names=class_names,
        mode='classification'
    )

    new_student_data = np.array([7, 5, 5])
    
    explanation = explainer.explain_instance(
        data_row=new_student_data,
        predict_fn=model.predict_proba,
        num_features=len(feature_names),
        top_labels=1
    )


demonstrate_lime_interpretability()








# Section 6.1
# Bias and Fairness in Algorithms - Conceptual Example of Data Mitigation
def demonstrate_data_fairness_mitigation():
    ####
    data_biased = pd.DataFrame({
        'Age': [30, 25, 35, 40, 28, 32, 29, 38],
        'Income': [50000, 40000, 60000, 70000, 45000, 55000, 48000, 65000],
        'Gender': ['M', 'F', 'M', 'M', 'F', 'F', 'M', 'F'],
        'LoanApproved': [1, 0, 1, 1, 0, 1, 1, 0]
    })

    data_fairer = pd.DataFrame({
        'Age': [30, 25, 35, 40, 28, 32, 29, 38],
        'Income': [50000, 40000, 60000, 70000, 45000, 55000, 48000, 65000],
        'Gender': ['M', 'F', 'M', 'M', 'F', 'F', 'M', 'F'],
        'LoanApproved': [1, 1, 1, 1, 1, 1, 1, 1]
    })

demonstrate_data_fairness_mitigation()

In [1]:
# Machine Learning: Simple Ideas
# Section 1.0 Machine Learning

# This section talks about the general concept of Machine Learning.
# No specific Python packages are directly mentioned here, as it's a high-level introduction.

## Data-Driven Models
# This subsection explains the difference between data-driven and model-driven approaches.
# In Python, we often work with data using libraries like 'pandas' for data manipulation
# and 'numpy' for numerical operations.

import pandas as pd # For working with data tables
import numpy as np  # For numerical operations, like arrays and calculations

#### Example: Creating a simple dataset
data = {
    'feature1': [10, 20, 15, 25, 30],
    'feature2': [1, 2, 1.5, 2.5, 3],
    'label': ['cat', 'dog', 'cat', 'dog', 'cat']
}
df = pd.DataFrame(data)
# print(df) # To show the data frame, commented out for brevity


In [2]:

## Feature Space
# This subsection describes what features are and how they are represented.
# Key packages: 'numpy' for vector operations, 'scipy.spatial.distance' for distance calculations.

### Equation 2.1: Feature vector representation
# In Python, a feature vector is simply a list or a NumPy array.
feature_vector_example = np.array([1.0, 2.5, 3.0])
# Here, d=3, and x_1j=1.0, x_2j=2.5, x_3j=3.0

### Equation 2.2: Euclidean distance
from scipy.spatial.distance import euclidean # Part of SciPy, a scientific computing library

point1 = np.array([1, 2])
point2 = np.array([4, 6])
euclidean_distance = euclidean(point1, point2)
# print(f"Euclidean Distance: {euclidean_distance}") # Should be 5.0

### Equation 2.3: Hamming distance
# We can implement XOR for binary vectors.
def hamming_distance(vec1, vec2):
    return np.sum(vec1 != vec2)

binary_vec1 = np.array([1, 0, 1])
binary_vec2 = np.array([1, 1, 0])
ham_dist = hamming_distance(binary_vec1, binary_vec2)
# print(f"Hamming Distance: {ham_dist}") # Should be 2

### Equation 2.4: Cosine similarity
from numpy.linalg import norm # For calculating vector norms (lengths)

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = norm(vec1)
    norm_vec2 = norm(vec2)
    if norm_vec1 == 0 or norm_vec2 == 0:
        return 0.0 # Handle division by zero for zero vectors
    return dot_product / (norm_vec1 * norm_vec2)

vec_a = np.array([1, 1, 0])
vec_b = np.array([1, 0, 1])
cos_sim = cosine_similarity(vec_a, vec_b)
# print(f"Cosine Similarity: {cos_sim}") # Example result

### Equation 2.5: Joint probability for independent variables
# This is a conceptual example in Python.
# It shows how probabilities multiply for independent events.
p_x1 = 0.5
p_x2 = 0.4
p_x3 = 0.3
joint_probability = p_x1 * p_x2 * p_x3
# print(f"Joint Probability (independent): {joint_probability}")

#### Feature Selection and Feature Extraction
# Libraries like 'sklearn.feature_selection' and 'sklearn.decomposition' are key here.
from sklearn.feature_selection import SelectKBest, chi2 # For feature selection
from sklearn.decomposition import PCA # For Principal Component Analysis (feature extraction)

# Example for Feature Selection (conceptual)
# X_data = np.array([[10, 0.5, 2], [12, 0.6, 1], [8, 0.4, 3], [15, 0.7, 0]])
# y_labels = np.array([0, 1, 0, 1])
# selector = SelectKBest(chi2, k=2)
# X_new = selector.fit_transform(X_data, y_labels)
# print(f"Selected features:\n{X_new}")

# Example for Feature Extraction (conceptual)
# pca = PCA(n_components=2)
# X_reduced = pca.fit_transform(X_data)
# print(f"PCA reduced features:\n{X_reduced}")

## Supervised and Unsupervised Learning
# This section distinguishes between supervised and unsupervised learning.
# 'sklearn' (Scikit-learn) is the primary library for both.

### Supervised Learning
# 'sklearn.linear_model' for regression, 'sklearn.svm' for classification.
from sklearn.model_selection import train_test_split # For splitting data
from sklearn.linear_model import LogisticRegression # An example classifier

# Example: Prepare data for supervised learning
X = df[['feature1', 'feature2']] # Features
y = df['label'] # Labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a supervised model
model_supervised = LogisticRegression()
model_supervised.fit(X_train, y_train)
# print(f"Supervised Model Score: {model_supervised.score(X_test, y_test)}")

### Unsupervised Learning
# 'sklearn.cluster' for clustering, 'sklearn.manifold' for low-dimensional embedding.
from sklearn.cluster import KMeans # An example clustering algorithm

# Example: Prepare data for unsupervised learning (no labels needed for training)
X_unsupervised = df[['feature1', 'feature2']]

# Train an unsupervised model
model_unsupervised = KMeans(n_clusters=2, random_state=42, n_init=10)
model_unsupervised.fit(X_unsupervised)
# print(f"Cluster Labels: {model_unsupervised.labels_}")

#### Active Learning Strategies (Conceptual in Python)
# Active learning often involves a loop where the model makes predictions,
# uncertainty is calculated, and then new data points are selected for labeling.
# No direct 'sklearn' module for active learning, but tools can be combined.

# from modAL.models import ActiveLearner # A dedicated library for active learning
# from sklearn.ensemble import RandomForestClassifier

# # Example: Uncertainty sampling - selecting data with low confidence
# # This often involves getting prediction probabilities from a model.
# # For instance, if model_supervised is a classifier:
# probabilities = model_supervised.predict_proba(X_test)
# # You would then identify indices where max(probability) is close to 0.5 (for binary classification)

## Generalization Performance
# This section deals with error measurement and the concept of generalization.
# 'sklearn.metrics' is crucial for loss functions and evaluating performance.

### Equation 2.6: Squared Error
from sklearn.metrics import mean_squared_error

y_true_reg = np.array([5, 10, 15])
y_pred_reg = np.array([4, 11, 13])
mse = mean_squared_error(y_true_reg, y_pred_reg)
# print(f"Mean Squared Error: {mse}")

### Equation 2.7: Absolute Error
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_true_reg, y_pred_reg)
# print(f"Mean Absolute Error: {mae}")

### Equation 2.8: 0-1 Loss Function (Misclassification Error)
# In sklearn, this is often the inverse of accuracy.
from sklearn.metrics import accuracy_score

y_true_cls = np.array([0, 1, 0, 1])
y_pred_cls = np.array([0, 0, 0, 1]) # One misclassification
zero_one_loss = 1 - accuracy_score(y_true_cls, y_pred_cls)
# print(f"0-1 Loss: {zero_one_loss}")

### Equation 2.9: Hinge Loss
# This is typically used internally by SVMs (Support Vector Machines).
# You can implement it manually for conceptual understanding.
def hinge_loss(y_true, y_pred_scores):
    # y_true should be +1 or -1
    # y_pred_scores are the decision function scores
    return np.maximum(0, 1 - y_true * y_pred_scores)

# Example:
# y_true_hinge = np.array([1, -1, 1, -1])
# y_pred_scores_hinge = np.array([0.8, -0.6, 0.1, -0.9])
# h_loss = hinge_loss(y_true_hinge, y_pred_scores_hinge)
# print(f"Hinge Loss per sample: {h_loss}")

### Equation 2.10: Training Error (Empirical Risk)
# This is the average of the loss function over the training data.
training_error = np.mean(mean_squared_error(y_true_reg, y_pred_reg)) # Example using MSE
# print(f"Training Error (Empirical Risk): {training_error}")

### Equation 2.11: Example of Overfitting Function
# This function is not typically implemented directly as a model in ML libraries,
# but it illustrates a concept.

### Equation 2.12 & 2.13: Test Error (Generalization Error)
# This is calculated using unseen data, typically from X_test, y_test.
test_predictions = model_supervised.predict(X_test)
generalization_accuracy = accuracy_score(y_test, test_predictions)
# print(f"Generalization Accuracy: {generalization_accuracy}")

### Equation 2.14: Bias-Variance Decomposition (Conceptual)
# This concept is critical but not a direct function in Python libraries.
# It helps in understanding model trade-offs during development.

## Model Complexity
# This section discusses VC dimension and PAC learning.
# No direct Python functions for VC dimension calculation in 'sklearn'.
# It's more of a theoretical concept influencing model choices.

### Equation 2.15: Vapnik's Theorem (Conceptual)
# This is a theoretical bound. Libraries don't directly calculate this probability.
# However, the idea of limiting model complexity is implemented through regularization.

#### Regularization
# 'sklearn' models often have 'C' (for SVMs) or 'alpha' (for Lasso/Ridge regression) parameters
# to control regularization and thus model complexity.
from sklearn.linear_model import Ridge # An example of a regularized model

# ridge_model = Ridge(alpha=1.0) # alpha controls the amount of regularization
# ridge_model.fit(X_train, y_train_numerical) # Assuming y_train is numerical for regression
# print(f"Ridge Model Coefficients: {ridge_model.coef_}")

## Ensembles
# This section covers combining multiple models for better performance.
# 'sklearn.ensemble' is the primary package.

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier # Examples of ensemble methods
from sklearn.tree import DecisionTreeClassifier # A common base classifier

#### Bagging and Random Forests
# RandomForestClassifier in 'sklearn' is an implementation of bagging with decision trees.
rf_model = RandomForestClassifier(n_estimators=100, random_state=42) # 100 trees
rf_model.fit(X_train, y_train)
# print(f"Random Forest Score: {rf_model.score(X_test, y_test)}")

#### Stacking
# 'sklearn.ensemble.StackingClassifier' (or Regressor) is available in newer sklearn versions.
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier # Another base classifier

# Example of Stacking (conceptual)
# estimators = [
#     ('lr', LogisticRegression(random_state=42)),
#     ('rf', RandomForestClassifier(random_state=42))
# ]
# stacking_model = StackingClassifier(estimators=estimators, final_estimator=KNeighborsClassifier())
# stacking_model.fit(X_train, y_train)
# print(f"Stacking Classifier Score: {stacking_model.score(X_test, y_test)}")

#### Boosting
# 'sklearn.ensemble.GradientBoostingClassifier' is an example of boosting.
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)
# print(f"Gradient Boosting Score: {gb_model.score(X_test, y_test)}")

## Data Dependencies and Computational Complexity
# This section discusses how data structure and dependencies affect computation time.
# While no direct Python functions represent N:N or N:K dependencies as variables,
# the choice of algorithm in 'sklearn' implicitly handles these complexities.

#### Distance Matrix Calculation
# Example of O(N^2 * d) complexity for a distance matrix.
# If you have N points with d features:
num_points = 100
num_features = 10
random_data = np.random.rand(num_points, num_features)

# dist_matrix = euclidean_distances(random_data) # This would calculate all N*N distances
# from sklearn.metrics.pairwise import euclidean_distances

#### Tree-based Spatial Index (Conceptual)
# Libraries like 'scipy.spatial.KDTree' or 'sklearn.neighbors.NearestNeighbors'
# use tree-like structures to speed up nearest neighbor searches, reducing complexity.
from sklearn.neighbors import NearestNeighbors

# nn_model = NearestNeighbors(n_neighbors=5, algorithm='kd_tree')
# nn_model.fit(random_data)
# distances, indices = nn_model.kneighbors(random_data)
# print(f"Distances to 5 nearest neighbors:\n{distances[:5]}")

#### Neural Networks and N:K Dependency
# 'tensorflow' or 'pytorch' are popular libraries for building neural networks.
# Their architectures inherently manage the N:K dependency through weights.
# The number of weights K is usually much smaller than the number of data points N.

# import tensorflow as tf
# from tensorflow import keras

# model_nn = keras.Sequential([
#     keras.layers.Dense(units=10, activation='relu', input_shape=(num_features,)), # K=10 weights (plus biases)
#     keras.layers.Dense(units=1, activation='sigmoid')
# ])
# model_nn.compile(optimizer='adam', loss='binary_crossentropy')
# model_nn.fit(random_data, np.random.randint(0, 2, num_points), epochs=10)
# This example is conceptual to show how NN layers have a set number of units/weights (K)
# independent of the number of training samples (N) once the architecture is defined.