#Data Science Task with Parallel Processes

In [89]:
import multiprocessing
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Function to perform data science task (e.g., training a model)
def perform_data_science_task(data):
    X_train, y_train = data[0], data[1]  # Unpack the data tuple
    model = RandomForestRegressor(n_estimators=100)
    model.fit(X_train, y_train)
    return model

# Function to split data into chunks for parallel processing
def chunk_data(data, num_chunks):
    chunk_size = len(data[0]) // num_chunks
    chunks = []
    for i in range(num_chunks):
        start = i * chunk_size
        end = (i + 1) * chunk_size if i < num_chunks - 1 else len(data[0])
        chunks.append((data[0][start:end], data[1][start:end]))
    return chunks

# Function to evaluate model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse

if __name__ == '__main__':
    # Example data (random data for demonstration)
    num_samples = 1000
    num_features = 10
    X = np.random.rand(num_samples, num_features)
    y = np.random.rand(num_samples)

    # Split data into chunks for parallel processing
    num_processes = multiprocessing.cpu_count()  # Number of CPU cores
    data_chunks = chunk_data((X, y), num_processes)

    # Perform data science task (training model) in parallel
    with multiprocessing.Pool(processes=num_processes) as pool:
        models = pool.map(perform_data_science_task, data_chunks)

    # Example test data (random data for demonstration)
    num_samples_test = 200
    X_test = np.random.rand(num_samples_test, num_features)
    y_test = np.random.rand(num_samples_test)

    # Evaluate each model
    evaluation_results = []
    for model in models:
        mse = evaluate_model(model, X_test, y_test)
        evaluation_results.append(mse)

    # Print evaluation results
    for i, mse in enumerate(evaluation_results):
        print(f"Model {i+1} MSE: {mse}")


Model 1 MSE: 0.08429601602093935
Model 2 MSE: 0.08797028175485362
