<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/_Advanced_Research_and_Observations_Step_3_Add_Advanced_Statistical_Methods_and_Visualization_Tools.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error
import multiprocessing as mp
from sklearn.linear_model import LinearRegression

# 1. Data Collection Class
class DataCollection:
    def __init__(self, data_source: str):
        self.data_source = data_source
        self.data = None

    def collect_data(self) -> np.ndarray:
        self.data = np.random.normal(0, 1, 1000)
        print("Data collected from source.")
        return self.data

    def preprocess_data(self) -> np.ndarray:
        self.data = (self.data - np.mean(self.data)) / np.std(self.data)
        print("Data preprocessed.")
        return self.data


# 2. Error Analysis Class
class ErrorAnalysis:
    @staticmethod
    def calculate_standard_error(data: np.ndarray) -> float:
        n = len(data)
        standard_error = np.std(data) / np.sqrt(n)
        print(f"Standard Error: {standard_error}")
        return standard_error

    @staticmethod
    def confidence_interval(data: np.ndarray, confidence: float = 0.95) -> tuple:
        mean = np.mean(data)
        sem = stats.sem(data)
        margin = sem * stats.t.ppf((1 + confidence) / 2., len(data) - 1)
        interval = (mean - margin, mean + margin)
        print(f"Confidence Interval ({confidence*100}%): {interval}")
        return interval


# 3. Model Validation Class
class ModelValidation:
    def __init__(self, model, X: np.ndarray, y: np.ndarray):
        self.model = model
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2)

    def validate_model(self) -> float:
        self.model.fit(self.X_train, self.y_train)
        predictions = self.model.predict(self.X_test)
        mse = mean_squared_error(self.y_test, predictions)
        print(f"Model Validation - MSE: {mse}")
        return mse

    def k_fold_validation(self, k: int = 5) -> None:
        kf = KFold(n_splits=k)
        mse_scores = []
        for train_index, test_index in kf.split(self.X_train):
            X_train_kf, X_test_kf = self.X_train[train_index], self.X_train[test_index]
            y_train_kf, y_test_kf = self.y_train[train_index], self.y_train[test_index]
            self.model.fit(X_train_kf, y_train_kf)
            predictions = self.model.predict(X_test_kf)
            mse = mean_squared_error(y_test_kf, predictions)
            mse_scores.append(mse)
        mean_mse = np.mean(mse_scores)
        print(f"{k}-Fold Cross-Validation Mean MSE: {mean_mse}")


# 4. Scalable Computing Class for Parallel Processing
class ScalableComputing:
    @staticmethod
    def parallel_computation(func, data: list, num_processes: int = 4) -> list:
        with mp.Pool(num_processes) as pool:
            results = pool.map(func, data)
        print("Parallel computation completed.")
        return results


# 5. Statistical Analysis Class
class StatisticalAnalysis:
    @staticmethod
    def hypothesis_testing(data: np.ndarray, mu: float = 0) -> dict:
        t_stat, p_val = stats.ttest_1samp(data, mu)
        print(f"Hypothesis Testing - T-statistic: {t_stat}, P-value: {p_val}")
        return {'t_stat': t_stat, 'p_val': p_val}

    @staticmethod
    def bayesian_inference(data: np.ndarray, prior_mean: float = 0, prior_std: float = 1) -> dict:
        # Placeholder for a Bayesian inference implementation
        posterior_mean = np.mean(data)  # Simplified example
        posterior_std = np.std(data) / np.sqrt(len(data))  # Simplified example
        print(f"Bayesian Inference - Posterior Mean: {posterior_mean}, Posterior Std: {posterior_std}")
        return {'posterior_mean': posterior_mean, 'posterior_std': posterior_std}

    @staticmethod
    def monte_carlo_simulation(func, num_simulations: int = 1000) -> np.ndarray:
        results = np.array([func() for _ in range(num_simulations)])
        print("Monte Carlo Simulation completed.")
        return results


# 6. Visualization Class
class Visualization:
    @staticmethod
    def plot_distribution(data: np.ndarray) -> None:
        sns.histplot(data, kde=True)
        plt.title('Data Distribution')
        plt.show()

    @staticmethod
    def plot_confidence_interval(data: np.ndarray, confidence_interval: tuple) -> None:
        sns.histplot(data, kde=True)
        plt.axvline(x=confidence_interval[0], color='r', linestyle='--')
        plt.axvline(x=confidence_interval[1], color='r', linestyle='--')
        plt.title(f'Confidence Interval: {confidence_interval}')
        plt.show()


# Example usage of each class
if __name__ == "__main__":
    # 1. Data Collection
    data_collector = DataCollection(data_source="sensor")
    data = data_collector.collect_data()
    preprocessed_data = data_collector.preprocess_data()

    # 2. Error Analysis
    error_analysis = ErrorAnalysis()
    error_analysis.calculate_standard_error(preprocessed_data)
    ci = error_analysis.confidence_interval(preprocessed_data)

    # 3. Model Validation (Example with a simple linear regression model)
    X = np.random.rand(1000, 1)
    y = 3.5 * X.flatten() + np.random.normal(0, 0.1, 1000)

    model = LinearRegression()
    validator = ModelValidation(model, X, y)
    validator.validate_model()
    validator.k_fold_validation()

    # 4. Scalable Computing - Parallel processing example
    def square(x):
        return x ** 2

    scalable_comp = ScalableComputing()
    results = scalable_comp.parallel_computation(square, list(range(10)), num_processes=4)
    print("Parallel Computation Results:", results)

    # 5. Statistical Analysis
    stat_analysis = StatisticalAnalysis()
    stat_analysis.hypothesis_testing(preprocessed_data)
    stat_analysis.bayesian_inference(preprocessed_data)

    # 6. Visualization
    viz = Visualization()
    viz.plot_distribution(preprocessed_data)
    viz.plot_confidence_interval(preprocessed_data, ci)