In [None]:
import json

def parse_target_config(json_data):
    # Extracting the 'target' section from the JSON
    target_config = json_data.get("target", {})

    # Reading the required information
    prediction_type = target_config.get("prediction_type", "Not specified")
    target_variable = target_config.get("target", "Not specified")
    regression_type = target_config.get("type", "Not specified")
    partitioning = target_config.get("partitioning", False)

    # Printing the extracted information
    print(f"Prediction Type: {prediction_type}")
    print(f"Target Variable: {target_variable}")
    print(f"Regression Type: {regression_type}")
    print(f"Partitioning Enabled: {partitioning}")

# Example JSON input
json_input = """
{
  "target": {
    "prediction_type": "Regression",
    "target": "petal_width",
    "type": "regression",
    "partitioning": true
  }
}
"""

# Parse the provided JSON
json_data = json.loads(json_input)
parse_target_config(json_data)


Prediction Type: Regression
Target Variable: petal_width
Regression Type: regression
Partitioning Enabled: True


In [None]:
import pandas as pd
import json

# Your JSON input as a string
json_input = """
{
  "feature_handling": {
    "sepal_length": {
      "feature_name": "sepal_length",
      "is_selected": true,
      "feature_variable_type": "numerical",
      "feature_details": {
        "numerical_handling": "Keep as regular numerical feature",
        "rescaling": "No rescaling",
        "make_derived_feats": false,
        "missing_values": "Impute",
        "impute_with": "Average of values",
        "impute_value": 0
      }
    }
  }
}
"""

# Parse the JSON input to get the configuration
feature_config = json.loads(json_input)

# Function to apply imputation based on the feature configuration
def apply_imputation(df, feature_config):
    for feature, config in feature_config["feature_handling"].items():
        if config["feature_details"]["missing_values"] == "Impute":
            if config["feature_details"]["impute_with"] == "Average of values":
                # Calculate the average without considering NaN values
                avg_value = df[feature].mean()
                # Fill NaN values with the calculated average
                df[feature].fillna(avg_value, inplace=True)
            # Extend this block to handle other imputation methods as needed

# Load your dataset
df = pd.read_csv('iris.csv')

# Apply the imputation to the DataFrame based on the configuration
apply_imputation(df, feature_config)

# Display the DataFrame to verify the imputation
print(df)


     sepal_length  sepal_width  petal_length  petal_width         species
0             5.1          3.5           1.4          0.2     Iris-setosa
1             4.9          3.0           1.4          0.2     Iris-setosa
2             4.7          3.2           1.3          0.2     Iris-setosa
3             4.6          3.1           1.5          0.2     Iris-setosa
4             5.0          3.6           1.4          0.2     Iris-setosa
..            ...          ...           ...          ...             ...
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica

[150 rows x 5 columns]


In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from scipy.stats import pearsonr

# Assuming df is your DataFrame and target_variable is your target column's name
def apply_feature_reduction(df, target_variable, config):
    method = config["feature_reduction_method"]
    reduced_df = df.copy()

    # No Reduction
    if config["No Reduction"]["is_selected"]:
        # Assuming 'No Reduction' simply means limiting the number of features without any specific method
        num_features = config["No Reduction"]["num_of_features_to_keep"]
        reduced_df = reduced_df.iloc[:, :num_features]

    # Correlation with Target
    elif config["Correlation with target"]["is_selected"]:
        num_features = config["Correlation with target"]["num_of_features_to_keep"]
        corr_scores = {col: pearsonr(df[col], df[target_variable])[0] for col in df.columns if df[col].dtype != 'object' and col != target_variable}
        sorted_features = sorted(corr_scores, key=corr_scores.get, reverse=True)[:num_features]
        reduced_df = df[sorted_features + [target_variable]]

    # Tree-based
    elif config["Tree-based"]["is_selected"]:
        num_features = config["Tree-based"]["num_of_features_to_keep"]
        clf = ExtraTreesClassifier(n_estimators=config["Tree-based"]["num_of_trees"])
        clf = clf.fit(df.drop(target_variable, axis=1), df[target_variable])
        importances = clf.feature_importances_
        indices = np.argsort(importances)[::-1][:num_features]
        selected_features = df.columns[indices]
        reduced_df = df[selected_features.tolist() + [target_variable]]

    # PCA
    elif config["Principal Component Analysis"]["is_selected"]:
        num_features = config["Principal Component Analysis"]["num_of_features_to_keep"]
        pca = PCA(n_components=num_features)
        principalComponents = pca.fit_transform(df.drop(target_variable, axis=1))
        reduced_df = pd.DataFrame(data = principalComponents, columns = [f'PC{i}' for i in range(1, num_features + 1)])
        reduced_df[target_variable] = df[target_variable]

    return reduced_df

# Example usage:
json_config = {
  "feature_reduction_method": "Correlation with target",
  "No Reduction": {"is_selected": True, "num_of_features_to_keep": 5},
  "Correlation with target": {"is_selected": False, "num_of_features_to_keep": 8},
  "Tree-based": {"is_selected": False, "num_of_features_to_keep": 0, "depth_of_trees": 0, "num_of_trees": 0},
  "Principal Component Analysis": {"is_selected": False, "num_of_features_to_keep": 0},
}

# Load your dataset
df = pd.read_csv('iris.csv')
target_variable = 'YourTargetColumnNameHere'

# Apply feature reduction
reduced_df = apply_feature_reduction(df, target_variable, json_config)

# Check the result
print(reduced_df.head())


   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [None]:
import json
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

# Sample JSON configuration
json_config = """
{
  "prediction_type": "Classification",
  "models": {
    "LogisticRegression": {
      "model_name": "LogisticRegression",
      "is_selected": true,
      "parallelism": 2,
      "min_iter": 30,
      "max_iter": 50,
      "min_regparam": 0.5,
      "max_regparam": 0.8,
      "min_elasticnet": 0.5,
      "max_elasticnet": 0.8
    }
  }
}
"""

# Function to parse JSON and instantiate models
def instantiate_model_from_json(json_str):
    config = json.loads(json_str)
    models = []

    if config["prediction_type"] == "Classification":
        # For each model configuration
        for model_name, model_config in config["models"].items():
            if model_config["is_selected"]:
                if model_name == "LogisticRegression":
                    # Example: Instantiate logistic regression with averaged parameters
                    # Adjust the instantiation as needed based on the parameters you want to use
                    lr = LogisticRegression(
                        max_iter=int((model_config["min_iter"] + model_config["max_iter"]) / 2),
                        C=1.0 / ((model_config["min_regparam"] + model_config["max_regparam"]) / 2),  # Inverse of regularization strength
                        # L1 ratio or other parameters related to elastic net can be set similarly
                    )
                    models.append(lr)
                # Extend with elif blocks for other classification models as needed
    elif config["prediction_type"] == "Regression":
        # Instantiate regression models similarly, for example:
        pass  # Add logic for regression models here

    return models

# Example usage
models = instantiate_model_from_json(json_config)
for model in models:
    print(model)


LogisticRegression(C=1.5384615384615383, max_iter=40)
