In [None]:
# -------- INFO --------
"""
Repository: https://github.com/NLBrien/mod550-2025
Creation date: 2025-10-14
Author: Nathan L.Brien
Course: MOD550 - Machine Learning
Title: Semester project
Description:    Collect all functions into one python notebook file.
                Functions couldn't be launched as individual file due to unknown python error.
                Needed to gather all sub-function into one script to fix library error.
    Listed functions
    - Linear Regression (LinReg) using sklearn
    - Mean Squared Error (MSE) code in vanilla Python
    - Neural Network (NN) using keras
    - K-Means (KM) clustering
    - Gaussian (GMM) code
Python 3.10.9: necessary for use of tensorflow

Last modification date: 2025-10-15
"""


"\nRepository: https://github.com/NLBrien/mod550-2025\nCreation date: 2025-10-14\nAuthor: Nathan L.Brien\nCourse: MOD550 - Machine Learning\nTitle: Semester project\nDescription:    Collect all functions into one python notebook file.\n                Functions couldn't be launched as individual file due to unknown python error.\n                Needed to gather all sub function to one script to fix library error.\n    Listed functions\n    - Linear Regression (LinReg) using sklearn\n    - Mean Squared Error (MSE) code in vanilla Python\n    - Neural Network (NN) using keras\n    - K-Means (KM) clustering\n    - Gaussian (GMM) code\nPython 3.10.9: necessary for use of tensorflow\n\nLast modification date: 2025-10-14\n"

In [None]:
# -------- LIBRARIES --------

## BASIC LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## LINEAR REGRESSION (LinReg) IMPORT
from sklearn.linear_model import LinearRegression

## MEAN SQUARED ERROR (MSE) IMPORT
# N/A

## NEURAL NETWORK (NN) IMPORT
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l1, l2, l1_l2

## K-MEANS (KM) IMPORT
from sklearn.cluster import KMeans

## GAUSSIAN MIXTURE MODEL (GMM) IMPORT
from sklearn.mixture import GaussianMixture


In [None]:
# -------- LINEAR REGRESSION (LinReg) --------
"""
Linear Regression in simple Python (NumPy and sklearn)
Source script: PYTHON_DEF_linearregression.py

Before using this function, ensure that:
    - The necessary library is imported (NumPy, sklearn)
    - The data is numeric
    - The data contains no NaN values
Parameters:
    LinReg_x : [array], feature data
    LinReg_y : [array], target data
Returns:
    LinReg_model.coef_: [ndarray], regression coefficients
    LinReg_model.intercept_: [float], intercept term
    LinReg_y_pred : [ndarray], predicted values
"""

def Linear_Regression(LinReg_x, LinReg_y):

    ## Assign arrays
    x = np.array(LinReg_x, dtype=float)
    y = np.array(LinReg_y, dtype=float)

    # Reshape if data 1D instead of 2D
    if x.ndim == 1:
        x = x.reshape(-1, 1)
    if y.ndim == 1:
        y = y.reshape(-1, 1)

    # Initialize linear regresison model
    LinReg_model = LinearRegression()

    # Fit the model with the data
    LinReg_model.fit(x, y)

    # Predictions
    LinReg_y_pred = LinReg_model.predict(x)

    return LinReg_model.coef_, LinReg_model.intercept_, LinReg_y_pred, LinReg_model


In [4]:
# -------- MEAN SQUARED ERROR (MSE) --------
"""
Mean squared error (MSE) in vanilla Python
Source script: PYTHON_DEF_mse.py

Before using this function, run linear regression to get predictions.

Before using this function, ensure that:
    - The data is numeric (int or float)
    - The two input lists have the same length
    - The data contains no NaN values
Parameters:
    mse_observed = [list], observed values
    mse_predicted = [list], predicted values
Returns:
    mse = [float], mean squared error between observed and predicted values
"""

def Mean_Squared_Error(mse_observed, mse_predicted):
   
	## Validate input lengths
    if len(mse_observed) != len(mse_predicted):
        raise ValueError(
            f"The lengths of input lists are not equal: "
            f"{len(mse_observed)} vs {len(mse_predicted)}"
        )

    ## Initialize sum of squared errors
    sum_square_error = 0

    ## Loop through observations
    for obs, pred in zip(mse_observed, mse_predicted):
        sum_square_error += (obs - pred) ** 2

    ## Calculate mean squared error
    mse = sum_square_error / len(mse_observed)

    return mse


In [None]:
# -------- NEURAL NETWORK (NN) --------
"""
Neural Network in simple Python using Keras
Source script: PYTHON_DEF_neuralnetwork.py

Before using this function, ensure that:
    - The necessary libraries are imported (NumPy, TensorFlow/Keras)
    - The input data is preprocessed and scaled (StandardScaler)
    - The data is numeric
    - The data contains no NaN values
Parameters:
    NN_X : [array],  Feature data
    NN_y : [array], Target data
    dense_1: [int], (8, 16, 64, 128), Number of neurons for layer 1 (default = 8)
    dense_2: [int], (8, 16, 64, 128), Number of neurons for layer 2 (default = 64)
    activate_fnc: [str], ("relu", "tanh", "sigmoid", "elu"), Activating function for regression output (default = "relu")
    kernel_reg: [import], (l1, l2, l1_l2), Regularizing function for training (default = l2(0.01))
    optimize_fnc: [str], ("SGD", "Adagrad", "RMSProp", "Adam", "AdamW", "Nadam", "Adadelta")
        Optimizing function strategy, a.k.a. algorithm (default = "adam")
    epochs_nb : [int], Number of training epochs (default = 20)
Returns:
    NN_model : The trained neural network model
    NN_y_pred : [ndarray], Predicted values for the input X
    NN_final_loss: [float], final training loss value
    NN_mse: [float], mean squared error
    NN_mean_pred: [float], average of all predictions
"""

def Neural_Network(NN_x,
                   NN_y,
                   dense_1 = 8,
                   dense_2 = 64,
                   activate_fnc = "relu",
                   kernel_reg = l2(0.01),
                   optimize_fnc = "adam",
                   epochs_nb = 20, 
                   ):
    # Turn input values to numpy arrays
    x = np.array(NN_x, dtype=float)
    y = np.array(NN_y, dtype=float)

    # Reshape if data 1D instead of 2D
    if x.ndim == 1:
        x = x.reshape(-1, 1)
    if y.ndim == 1:
        y = y.reshape(-1, 1)

    # Define neural network model for linear regression
    NN_model = Sequential([
        Dense(dense_1, input_dim = NN_x.shape[1], activation = activate_fnc, kernel_regularizer = kernel_reg),
        Dense(dense_2, activation = activate_fnc, kernel_regularizer = kernel_reg),
        Dense(1)
    ])

    # Complete the model
    NN_model.compile(optimizer = optimize_fnc, loss = "mse")

    # Train the model
    history = NN_model.fit(x, y, epochs = epochs_nb, verbose = 0)

    # Use the model for prediction
    NN_y_pred = NN_model.predict(x)

    # Compute result metrics
    """
    Use of AI (Microsoft Copilot version 1.25091.124.0) to impliment metrics solution
    """
    ## final loss
    NN_final_loss = history.history['loss'][-1] if 'loss' in history.history else None
    ## mse
    NN_mse = Mean_Squared_Error(mse_observed = y, mse_predicted = NN_y_pred)
    ## mean prediction
    NN_mean_pred = float(np.mean(NN_y_pred))

    return NN_model, NN_y_pred, NN_final_loss, NN_mse, NN_mean_pred


In [None]:
# -------- K-MEANS (KM) --------
"""
K-Means clustering in simple Python using sklearn

Before using this function, ensure that:
    - The necessary libraries are imported (NumPy, scikit-learn, matplotlib for visualization)
    - The input data is numeric
    - The data contains no NaN values
K_Means_Elbow(KM_x, KM_y):
    Parameters:
        KM_x : [array], Feature data
        KM_y : [array], Target-like data
    Returns:
        KME_labels: [ndarray], shape (n_samples,), cluster assignment for each point
        KME_centers: [ndarray], shape (n_clusters, n_features), cluster centroids
        KME_inertia: [float], sum of squared distances of samples to their closest cluster center
        KME_model: [KMeans object], the trained KMeans model
K_Means_Optimal(KM_x, KM_y, cluster_nb=3, RSEED=42):
    Parameters:
        KM_x : [array], Feature data
        KM_y : [array], Target-like data
        cluster_nb : [int], Number of clusters to form (default = 3)
        RSEED : [int], Random seed for reproducibility (default = 42)
    Returns:
        KMO_labels: [ndarray], shape (n_samples,), cluster assignment for each point
        KMO_centers: [ndarray], shape (n_clusters, n_features), cluster centroids
        KMO_inertia: [float], sum of squared distances of samples to their closest cluster center
        KMO_model: [KMeans object], the trained KMeans model
"""
def K_Means_Elbow(KM_x, KM_y):
    
    # Turn input values to numpy arrays
    x = np.array(KM_x, dtype=float)
    y = np.array(KM_y, dtype=float)

    # Reshape if data 1D instead of 2D
    if x.ndim == 1:
        x = x.reshape(-1, 1)
    if y.ndim == 1:
        y = y.reshape(-1, 1)

    # Combine x and y into a single dataset for clustering
    data = np.column_stack((x, y))

    # Initialize and fit KMeans model
    inertias = []

    # Run KMeans for a range of cluster numbers to visualize the elbow method
    for i in range(1, 11):
        kmeans = KMeans(n_clusters = i, random_state = 42)
        kmeans.fit(data)
        inertias.append(kmeans.inertia_)

    # Plot the elbow method
    plt.plot(range(1, 11), inertias, marker='o')
    plt.title("K-Means elbow method\n(optimal clusters)")
    plt.xlabel("Number of clusters")
    plt.ylabel("Inertia")
    plt.show()

    return kmeans.labels_, kmeans.cluster_centers_, kmeans.inertia_, kmeans

def K_Means_Optimal(KM_x, KM_y, cluster_nb = 3, RSEED = 42):
    
    # Turn input values to numpy arrays
    x = np.array(KM_x, dtype=float)
    y = np.array(KM_y, dtype=float)

    # Reshape if data 1D instead of 2D
    if x.ndim == 1:
        x = x.reshape(-1, 1)
    if y.ndim == 1:
        y = y.reshape(-1, 1)

    # Combine x and y into a single dataset for clustering
    data = np.column_stack((x, y))

    # Initialize and fit KMeans model
    kmeans = KMeans(n_clusters = cluster_nb, random_state = RSEED)

    # Fit the model with the data
    kmeans.fit(data)

    return kmeans.labels_, kmeans.cluster_centers_, kmeans.inertia_, kmeans

In [None]:
# -------- GAUSSIAN MIXTURE MODEL (GMM) --------
"""
Gaussian Mixture Model (GMM) in simple Python using sklearn

Before using this function, ensure that:
    - The necessary libraries are imported (NumPy, sklearn)
    - The input data is numeric
    - The data contains no NaN values
Parameters:
    GMM_x : [array], Feature data (1D or 2D)
    component_nb : [int], Number of Gaussian components (clusters) to fit (default = 2)
    RSEED : [int], Random seed for reproducibility (default = 42)
Returns:
    GMM_labels : [ndarray], cluster assignment for each point
    GMM_probs : [ndarray], soft probabilities of belonging to each cluster
    GMM_means : [ndarray], estimated Gaussian means
    GMM_covariances : [ndarray], covariance matrices of each Gaussian component
    GMM_model : [GaussianMixture object], the trained GMM model
"""
def Gaussian_Mixture_Model(GMM_x, component_nb = 2, RSEED = 42):
    
    # Turn input values to numpy arrays
    x = np.array(GMM_x, dtype=float)
    
    # Reshape "x" if data 1D instead of 2D
    if x.ndim == 1:
        x = x.reshape(-1, 1)

    # Initialize and fit Gaussian Mixture Model
    gmm = GaussianMixture(n_components = component_nb, random_state=RSEED)
    gmm.fit(x)

    # Predict hard labels and soft probabilities
    labels = gmm.predict(x)
    probs = gmm.predict_proba(x)

    return labels, probs, gmm.means_, gmm.covariances_, gmm
