<a href="https://colab.research.google.com/github/Mr-RedBeard/Projects/blob/main/MachineLearningAutomation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Regression model accuracy Automation**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, VotingRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from skopt import BayesSearchCV
from mlxtend.regressor import StackingRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_regression
from sklearn.feature_selection import f_regression
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Generate sample regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Define regression algorithms
regression_algorithms = {
    "Linear Regression": LinearRegression(),
    "Lasso Regression": Lasso(),
    "Ridge Regression": Ridge(),
    "ElasticNet Regression": ElasticNet(),
    "Decision Tree Regression": DecisionTreeRegressor(),
    "Random Forest Regression": RandomForestRegressor(),
    "K-Nearest Neighbors Regression": KNeighborsRegressor(),
}

# Define ensemble methods
ensemble_models = {
    "Voting Regressor": VotingRegressor(estimators=list(regression_algorithms.items())),
    "AdaBoost Regressor": AdaBoostRegressor(),
    "Gradient Boosting Regressor": GradientBoostingRegressor(),
}

# Define scalers
scalers = {
    "Standard Scaler": StandardScaler(),
    "Min-Max Scaler": MinMaxScaler(),
    "Robust Scaler": RobustScaler()
}

# Define polynomial features
poly = PolynomialFeatures()

# Define regularization techniques
regularization_params = {
    Lasso(): {'alpha': (1e-6, 1e+6, 'log-uniform')},
    Ridge(): {'alpha': (1e-6, 1e+6, 'log-uniform')},
    ElasticNet(): {'alpha': (1e-6, 1e+6, 'log-uniform'), 'l1_ratio': (0, 1)}
}

# Define feature selection techniques
feature_selection = {
    "Select From Model": SelectFromModel(estimator=RandomForestRegressor()),
    "RFE": RFE(estimator=LinearRegression()),
    "SelectKBest": SelectKBest(score_func=f_regression),
}

# Define imputation techniques
imputation_techniques = {
    "Mean": SimpleImputer(strategy='mean'),
    "Median": SimpleImputer(strategy='median'),
    "Most Frequent": SimpleImputer(strategy='most_frequent')
}

# Combine all components for grid search
regression_combinations = [
    (scaler, model, ensemble, poly, reg, fs, imp)
    for scaler in scalers.values()
    for model in regression_algorithms.values()
    for ensemble in ensemble_models.values()
    for reg in regularization_params.items()
    for fs in feature_selection.items()
    for imp in imputation_techniques.items()
]

# Train and evaluate each combination with cross-validation and Bayesian optimization
results = []
for scaler, model, ensemble, poly, reg, fs, imp in regression_combinations:
    preprocessor = ColumnTransformer(
        transformers=[('imputer', imp, slice(0, 10))],
        remainder='passthrough')
    pipeline = Pipeline(steps=[('scaler', scaler), ('poly', poly), ('fs', fs), ('preprocessor', preprocessor), ('model', model), ('ensemble', ensemble)])
    bayes_search = BayesSearchCV(pipeline, reg, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, n_iter=50)
    bayes_search.fit(X, y)
    y_pred = bayes_search.predict(X)
    mse = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)
    results.append({
        "Scaler": scaler.__class__.__name__,
        "Algorithm": model.__class__.__name__,
        "Ensemble": ensemble.__class__.__name__,
        "Regularization": reg[0].__class__.__name__,
        "Feature_Selection": fs[0],
        "Imputation": imp[0],
        "MSE": mse,
        "R^2": r2
    })

# Convert results to DataFrame for visualization
import pandas as pd
results_df = pd.DataFrame(results)

# Visualize results
plt.figure(figsize=(12, 6))
plt.scatter(results_df.index, results_df["MSE"])
plt.xlabel("Configuration Index")
plt.ylabel("Mean Squared Error")
plt.title("Mean Squared Error Across Different Configurations")
plt.show()


**Classification models Accuracy Automation**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from skopt import BayesSearchCV
from mlxtend.classifier import StackingClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import f_classif
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Generate sample classification data
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# Define classification algorithms
classification_algorithms = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier(),
    "K-Nearest Neighbors Classifier": KNeighborsClassifier(),
}

# Define ensemble methods
ensemble_models = {
    "Voting Classifier": VotingClassifier(estimators=list(classification_algorithms.items())),
    "AdaBoost Classifier": AdaBoostClassifier(),
    "Gradient Boosting Classifier": GradientBoostingClassifier(),
}

# Define scalers
scalers = {
    "Standard Scaler": StandardScaler(),
    "Min-Max Scaler": MinMaxScaler(),
    "Robust Scaler": RobustScaler()
}

# Define polynomial features
poly = PolynomialFeatures()

# Define regularization techniques
regularization_params = {
    LogisticRegression(): {'C': (1e-6, 1e+6, 'log-uniform')},
}

# Define feature selection techniques
feature_selection = {
    "Select From Model": SelectFromModel(estimator=RandomForestClassifier()),
    "RFE": RFE(estimator=LogisticRegression()),
    "SelectKBest": SelectKBest(score_func=f_classif),
}

# Define imputation techniques
imputation_techniques = {
    "Mean": SimpleImputer(strategy='mean'),
    "Median": SimpleImputer(strategy='median'),
    "Most Frequent": SimpleImputer(strategy='most_frequent')
}

# Combine all components for grid search
classification_combinations = [
    (scaler, model, ensemble, poly, reg, fs, imp)
    for scaler in scalers.values()
    for model in classification_algorithms.values()
    for ensemble in ensemble_models.values()
    for reg in regularization_params.items()
    for fs in feature_selection.items()
    for imp in imputation_techniques.items()
]

# Train and evaluate each combination with cross-validation and Bayesian optimization
results = []
for scaler, model, ensemble, poly, reg, fs, imp in classification_combinations:
    preprocessor = ColumnTransformer(
        transformers=[('imputer', imp, slice(0, 10))],
        remainder='passthrough')
    pipeline = Pipeline(steps=[('scaler', scaler), ('poly', poly), ('fs', fs), ('preprocessor', preprocessor), ('model', model), ('ensemble', ensemble)])
    bayes_search = BayesSearchCV(pipeline, reg, cv=5, scoring='accuracy', n_jobs=-1, n_iter=50)
    bayes_search.fit(X, y)
    y_pred = bayes_search.predict(X)
    accuracy = accuracy_score(y, y_pred)
    results.append({
        "Scaler": scaler.__class__.__name__,
        "Algorithm": model.__class__.__name__,
        "Ensemble": ensemble.__class__.__name__,
        "Regularization": reg[0].__class__.__name__,
        "Feature_Selection": fs[0],
        "Imputation": imp[0],
        "Accuracy": accuracy
    })

# Convert results to DataFrame for visualization
import pandas as pd
results_df = pd.DataFrame(results)

# Visualize results
plt.figure(figsize=(12, 6))
plt.scatter(results_df.index, results_df["Accuracy"])
plt.xlabel("Configuration Index")
plt.ylabel("Accuracy")
plt.title("Accuracy Across Different Configurations")
plt.show()
