In [1]:
def mse(actual, predicted):
    return sum((a - p) ** 2 for a, p in zip(actual, predicted)) / len(actual)


In [2]:
import numpy as np

def gmm_synthetic_data(n_clusters, n_points, means, covariances, weights):
    data = []
    for _ in range(n_points):
        cluster = np.random.choice(range(n_clusters), p=weights)
        point = np.random.multivariate_normal(means[cluster], covariances[cluster])
        data.append(point)
    return np.array(data)


In [3]:
import numpy as np

def batch_norm(x, gamma, beta, epsilon=1e-5):
    mean = np.mean(x, axis=0)
    var = np.var(x, axis=0)
    x_normalized = (x - mean) / np.sqrt(var + epsilon)
    return gamma * x_normalized + beta


In [4]:
def jaccard_similarity(set1, set2):
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return intersection / union


In [5]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sgd_logistic_regression(X, y, lr=0.01, epochs=100):
    weights = np.zeros(X.shape[1])
    for epoch in range(epochs):
        for i in range(len(y)):
            gradient = (sigmoid(np.dot(X[i], weights)) - y[i]) * X[i]
            weights -= lr * gradient
    return weights


In [6]:
import numpy as np

def backpropagation(X, y, weights1, weights2, learning_rate):
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(z):
        return z * (1 - z)
    
    # Forward pass
    hidden_layer_input = np.dot(X, weights1)
    hidden_layer_output = sigmoid(hidden_layer_input)
    final_output = sigmoid(np.dot(hidden_layer_output, weights2))
    
    # Backward pass
    error = y - final_output
    d_output = error * sigmoid_derivative(final_output)
    
    error_hidden_layer = d_output.dot(weights2.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
    
    # Update weights
    weights2 += hidden_layer_output.T.dot(d_output) * learning_rate
    weights1 += X.T.dot(d_hidden_layer) * learning_rate
    return weights1, weights2


In [7]:
class DecisionTree:
    def __init__(self, criterion='gini'):
        self.criterion = criterion

    def gini(self, y):
        m = len(y)
        return 1 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

    def entropy(self, y):
        m = len(y)
        return -sum((np.sum(y == c) / m) * np.log2(np.sum(y == c) / m) for c in np.unique(y))

    def fit(self, X, y):
        # A simple implementation
        pass

    def predict(self, X):
        # Predict based on the built tree
        pass


In [8]:
def f1_score(y_true, y_pred):
    tp = sum((y_true == 1) & (y_pred == 1))
    fp = sum((y_true == 0) & (y_pred == 1))
    fn = sum((y_true == 1) & (y_pred == 0))
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    
    return 2 * (precision * recall) / (precision + recall)


In [9]:
import numpy as np

def hinge_loss(X, y, weights, bias):
    return max(0, 1 - y * (np.dot(X, weights) + bias))

def sgd_svm(X, y, lr=0.01, epochs=100):
    weights = np.zeros(X.shape[1])
    bias = 0
    for epoch in range(epochs):
        for i in range(len(y)):
            if y[i] * (np.dot(X[i], weights) + bias) < 1:
                weights -= lr * (-y[i] * X[i] + 0.01 * weights)
                bias -= lr * (-y[i])
            else:
                weights -= lr * 0.01 * weights
    return weights, bias


In [10]:
def pearson_correlation(x, y):
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    return np.sum((x - x_mean) * (y - y_mean)) / np.sqrt(np.sum((x - x_mean)**2) * np.sum((y - y_mean)**2))


In [11]:
import numpy as np

def adagrad(X, y, lr=0.01, epochs=100):
    weights = np.zeros(X.shape[1])
    bias = 0
    epsilon = 1e-8
    gradient_accumulator = np.zeros(X.shape[1])
    
    for epoch in range(epochs):
        for i in range(len(y)):
            prediction = np.dot(X[i], weights) + bias
            error = prediction - y[i]
            
            gradient = error * X[i]
            gradient_accumulator += gradient**2
            adjusted_gradient = lr / (np.sqrt(gradient_accumulator) + epsilon)
            
            weights -= adjusted_gradient * gradient
            bias -= lr * error
            
    return weights, bias


In [12]:
import numpy as np

def simulate_markov_chain(transition_matrix, initial_state, steps):
    state = np.array(initial_state)
    for _ in range(steps):
        state = np.dot(state, transition_matrix)
    return state


In [13]:
import numpy as np

class NaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = np.zeros((len(self.classes), X.shape[1]))
        self.var = np.zeros((len(self.classes), X.shape[1]))
        self.priors = np.zeros(len(self.classes))
        
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / X.shape[0]
            
    def predict(self, X):
        posteriors = []
        for x in X:
            posteriors.append(self._calculate_posterior(x))
        return np.argmax(posteriors, axis=1)
    
    def _calculate_posterior(self, x):
        posteriors = []
        for idx, c in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            class_conditional = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        return posteriors
    
    def _pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator


In [14]:
from sklearn.metrics import pairwise_distances

def silhouette_score(X, labels):
    distances = pairwise_distances(X)
    silhouette_scores = []
    
    for i in range(len(X)):
        same_cluster = distances[i][labels == labels[i]]
        other_clusters = distances[i][labels != labels[i]]
        a = np.mean(same_cluster)
        b = np.min([np.mean(other_clusters[labels == label]) for label in np.unique(labels) if label != labels[i]])
        silhouette_scores.append((b - a) / max(a, b))
    
    return np.mean(silhouette_scores)


In [15]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

def build_autoencoder(input_dim):
    model = Sequential()
    # Encoder
    model.add(Dense(64, input_dim=input_dim, activation='relu'))
    model.add(Dense(32, activation='relu'))
    # Decoder
    model.add(Dense(64, activation='relu'))
    model.add(Dense(input_dim, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='mse')
    return model


In [16]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

def sentiment_analysis(texts, labels):
    vectorizer = CountVectorizer(binary=True)
    X = vectorizer.fit_transform(texts)
    
    model = LogisticRegression()
    model.fit(X, labels)
    
    return model, vectorizer


In [17]:
import numpy as np

def mcc(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    tn = np.sum((y_true == 0) & (y_pred == 0))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    
    numerator = (tp * tn) - (fp * fn)
    denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    return numerator / (denominator + 1e-8)


In [18]:
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

def hierarchical_clustering(X):
    Z = linkage(X, 'ward')
    dendrogram(Z)
    plt.show()


In [19]:
import tensorflow as tf
from tensorflow.keras import layers

def build_generator():
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, input_dim=100, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(28 * 28, activation='tanh'))
    model.add(layers.Reshape((28, 28)))
    return model

def build_discriminator():
    model = tf.keras.Sequential()
    model.add(layers.Flatten(input_shape=(28, 28)))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model


In [23]:
from statsmodels.tsa.arima.model import ARIMA

def arima_forecasting(data, order=(1,1,1)):
    model = ARIMA(data, order=order)
    model_fit = model.fit()
    return model_fit.forecast()


In [22]:
pip install statsmodels


Collecting statsmodelsNote: you may need to restart the kernel to use updated packages.

  Downloading statsmodels-0.14.4-cp312-cp312-win_amd64.whl.metadata (9.5 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-0.5.6-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading statsmodels-0.14.4-cp312-cp312-win_amd64.whl (9.8 MB)
   ---------------------------------------- 0.0/9.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.8 MB ? eta -:--:--
   - -------------------------------------- 0.3/9.8 MB ? eta -:--:--
   -- ------------------------------------- 0.5/9.8 MB 1.0 MB/s eta 0:00:09
   --- ------------------------------------ 0.8/9.8 MB 1.1 MB/s eta 0:00:09
   ---- ----------------------------------- 1.0/9.8 MB 1.3 MB/s eta 0:00:07
   ----- ---------------------------------- 1.3/9.8 MB 1.2 MB/s eta 0:00:08
   ------ --------------------------------- 1.6/9.8 MB 1.3 MB/s eta 0:00:07
   -------- ------------------------------- 2.1/9.8 MB 1.3 MB/s eta 0:00: