In [1]:
#Read the target and type of regression to be run
import json

with open('algoparams_from_ui.json') as json_file:
    data = json.load(json_file)

target = data['design_state_data']['target']
prediction_type = target['prediction_type']
target_variable = target['target']
regression_type = target['type']
partitioning = target['partitioning']

# Print the extracted information
print(f"Prediction Type: {prediction_type}")
print(f"Target Variable: {target_variable}")
print(f"Regression Type: {regression_type}")
print(f"Partitioning: {partitioning}")


Prediction Type: Regression
Target Variable: petal_width
Regression Type: regression
Partitioning: True


In [2]:
#Reading the features
import pandas as pd
feature_handling = data['design_state_data']['feature_handling'] #Extract feature handling information

df = pd.read_csv('iris_modified.csv')

for feature_name, feature_info in feature_handling.items():
    if feature_info['is_selected']:
        feature_variable_type = feature_info['feature_variable_type']
        feature_details = feature_info.get('feature_details', {})
        missing_values = feature_details.get('missing_values', 'N/A')
        impute_with = feature_details.get('impute_with', 'N/A')
        impute_value = feature_details.get('impute_value', 'N/A')

        if missing_values == 'Impute':
            if impute_with == 'Average of values':
                if feature_variable_type == 'numerical':
                    df[feature_name].fillna(df[feature_name].mean(), inplace=True)
                else:
                    pass
            elif impute_with == 'Custom value':
                df[feature_name].fillna(impute_value, inplace=True)

print(df.head())


   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [3]:
#feature reduction based on input
feature_reduction = data['design_state_data']['feature_reduction']

if isinstance(feature_reduction, dict):
    for method, method_info in feature_reduction.items():
        if isinstance(method_info, dict) and method_info.get('is_selected', False):
            num_of_features_to_keep = method_info['num_of_features_to_keep']

            if method == 'No Reduction':
                pass
            elif method == 'Correlation with target':
                pass
            elif method == 'Tree-based':
                pass
            elif method == 'Principal Component Analysis': # Implementing PCA-based feature reduction
                pass

    print(df.head())
else:
    print("Invalid")


   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [4]:
from sklearn.linear_model import LogisticRegression

# Parsing JSON to extract the model parameters given in the file
logistic_regression_params = {
    "model_name": "Logistic Regression",
    "is_selected": True, 
    "parallelism": 2,
    "min_iter": 30,
    "max_iter": 50,
    "min_regparam": 0.5,
    "max_regparam": 0.8,
    "min_elasticnet": 0.5,
    "max_elasticnet": 0.8
}

if logistic_regression_params["is_selected"]:
    # Creating Logistic Regression as model object
    logistic_regression_model = LogisticRegression(
        random_state=0,
        max_iter=logistic_regression_params["max_iter"],
        solver="lbfgs",
        multi_class="auto",
        n_jobs=logistic_regression_params["parallelism"],
        C=[i/10 for i in range(int(logistic_regression_params["min_regparam"]*10),
                                int((logistic_regression_params["max_regparam"] + 0.1)*10))],
        l1_ratio=[i/10 for i in range(int(logistic_regression_params["min_elasticnet"]*10),
                                      int((logistic_regression_params["max_elasticnet"] + 0.1)*10))]
    )

    print(logistic_regression_model)
else:
    print("Logistic Regression is not selected.")


LogisticRegression(C=[0.5, 0.6, 0.7, 0.8], l1_ratio=[0.5, 0.6, 0.7, 0.8],
                   max_iter=50, n_jobs=2, random_state=0)


In [5]:
#sample for logistic regression as well
from sklearn.linear_model import LinearRegression
linear_regression_params = {
    "model_name": "Linear Regression",
    "is_selected": True,  # Change this to True to select the model
    "parallelism": 2,
    "min_iter": 30,
    "max_iter": 50,
}

if linear_regression_params["is_selected"]:
    linear_regression_model = LinearRegression(
        n_jobs=linear_regression_params["parallelism"]
    )
    print(linear_regression_model)
else:
    print("Linear Regression is not selected.")


LinearRegression(n_jobs=2)


In [16]:
# Displaying the selected features
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
selected_feature_indices = feature_selector.get_support(indices=True)
selected_features = X.columns[selected_feature_indices]
print("Selected Features:", selected_features)

Selected Features: Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')


In [17]:
selected_features

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')

In [8]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def create_feature_handling_pipeline(features, json_data):
    # Initializing an empty list to store the processing steps
    steps = []

    if "feature_handling" in json_data:
        feature_handling_params = json_data["feature_handling"]
        print("Selected Features:", features)
        print("Feature Handling Parameters:", feature_handling_params)

        for feature, details in feature_handling_params.items():
            if details.get("is_selected", False):
                print(f"Processing feature: {feature}")

                # Handling missing values
                feature_details = details.get("feature_details", {})
                if feature_details.get("missing_values") == "Impute":
                    impute_strategy = feature_details.get("impute_with", "Average of values")
                    if impute_strategy == "Average of values":
                        imputer = SimpleImputer(strategy="mean")
                    elif impute_strategy == "custom":
                        impute_value = feature_details.get("impute_value", 0)
                        imputer = SimpleImputer(strategy="constant", fill_value=impute_value)
                    steps.append((f"{feature}_imputer", imputer))

                # Rescaling
                if feature_details.get("rescaling") == "StandardScaler":
                    scaler = StandardScaler()

                    steps.append((f"{feature}_scaler", scaler))

        # Creating the pipeline
        feature_handling_pipeline = Pipeline(steps)

        return feature_handling_pipeline
    else:
        print("'feature_handling' key not found in json_data.")
        return None
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]

json_data = {
    "feature_handling": {
        "sepal_length": {
            "feature_name": "sepal_length",
            "is_selected": True,
            "feature_variable_type": "numerical",
            "feature_details": {
                "numerical_handling": "Keep as regular numerical feature",
                "rescaling": "StandardScaler",
                "make_derived_feats": False,
                "missing_values": "Impute",
                "impute_with": "Average of values",
                "impute_value": 0
            }
        },
        "sepal_width": {
            "feature_name": "sepal_width",
            "is_selected": True,
            "feature_variable_type": "numerical",
            "feature_details": {
                "numerical_handling": "Keep as regular numerical feature",
                "rescaling": "No rescaling",
                "make_derived_feats": False,
                "missing_values": "Impute",
                "impute_with": "custom",
                "impute_value": -1
            }
        },
        "petal_length": {
            "feature_name": "petal_length",
            "is_selected": True,
            "feature_variable_type": "numerical",
            "feature_details": {
                "numerical_handling": "Keep as regular numerical feature",
                "rescaling": "No rescaling",
                "make_derived_feats": False,
                "missing_values": "Impute",
                "impute_with": "Average of values",
                "impute_value": 0
            }
        },
        "petal_width": {
            "feature_name": "petal_width",
            "is_selected": True,
            "feature_variable_type": "numerical",
            "feature_details": {
                "numerical_handling": "Keep as regular numerical feature",
                "rescaling": "No rescaling",
                "make_derived_feats": False,
                "missing_values": "Impute",
                "impute_with": "custom",
                "impute_value": -2
            }
        },
        "species": {
            "feature_name": "species",
            "is_selected": True,
            "feature_variable_type": "text",
            "feature_details": {
                "text_handling": "Tokenize and hash",
                "hash_columns": 0
            }
        },
    }
}

# Creating the feature handling pipeline
feature_pipeline = create_feature_handling_pipeline(selected_features, json_data)

if feature_pipeline is not None:
    # Fit and transform the data
    X_processed = feature_pipeline.fit_transform(X)
    print("Shape of the processed data:", X_processed.shape)


Selected Features: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
Feature Handling Parameters: {'sepal_length': {'feature_name': 'sepal_length', 'is_selected': True, 'feature_variable_type': 'numerical', 'feature_details': {'numerical_handling': 'Keep as regular numerical feature', 'rescaling': 'StandardScaler', 'make_derived_feats': False, 'missing_values': 'Impute', 'impute_with': 'Average of values', 'impute_value': 0}}, 'sepal_width': {'feature_name': 'sepal_width', 'is_selected': True, 'feature_variable_type': 'numerical', 'feature_details': {'numerical_handling': 'Keep as regular numerical feature', 'rescaling': 'No rescaling', 'make_derived_feats': False, 'missing_values': 'Impute', 'impute_with': 'custom', 'impute_value': -1}}, 'petal_length': {'feature_name': 'petal_length', 'is_selected': True, 'feature_variable_type': 'numerical', 'feature_details': {'numerical_handling': 'Keep as regular numerical feature', 'rescaling': 'No rescaling', 'make_derived_feats': 

In [9]:
print(json_data)

{'feature_handling': {'sepal_length': {'feature_name': 'sepal_length', 'is_selected': True, 'feature_variable_type': 'numerical', 'feature_details': {'numerical_handling': 'Keep as regular numerical feature', 'rescaling': 'StandardScaler', 'make_derived_feats': False, 'missing_values': 'Impute', 'impute_with': 'Average of values', 'impute_value': 0}}, 'sepal_width': {'feature_name': 'sepal_width', 'is_selected': True, 'feature_variable_type': 'numerical', 'feature_details': {'numerical_handling': 'Keep as regular numerical feature', 'rescaling': 'No rescaling', 'make_derived_feats': False, 'missing_values': 'Impute', 'impute_with': 'custom', 'impute_value': -1}}, 'petal_length': {'feature_name': 'petal_length', 'is_selected': True, 'feature_variable_type': 'numerical', 'feature_details': {'numerical_handling': 'Keep as regular numerical feature', 'rescaling': 'No rescaling', 'make_derived_feats': False, 'missing_values': 'Impute', 'impute_with': 'Average of values', 'impute_value': 0}}

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

y = df['petal_width']#Target variable

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 0.00019073333333334577


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

y = df['petal_width']
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
mse_linear = mean_squared_error(y_test, y_pred_linear)
print(f"Linear Regression - Mean Squared Error: {mse_linear}")

Linear Regression - Mean Squared Error: 6.804952470168233e-32


In [12]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

y = df['petal_width']
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
ridge_model = Ridge(alpha=1.0) 
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)

mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print(f"Ridge Regression - Mean Squared Error: {mse_ridge}")

Ridge Regression - Mean Squared Error: 0.0005079011076312915


In [13]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
y = df['petal_width']

X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
lasso_model = Lasso(alpha=1.0, random_state=42)
lasso_model.fit(X_train, y_train)
y_pred_lasso = lasso_model.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
print(f"Lasso Regression - Mean Squared Error: {mse_lasso}")

Lasso Regression - Mean Squared Error: 0.6421895833333334


In [14]:
#Finding mse bulkly 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
y = df['petal_width']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Defining the models
models = {
    'RandomForestRegressor': RandomForestRegressor(n_estimators=100, random_state=42),
    'LinearRegression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'DecisionTreeRegressor': DecisionTreeRegressor(),
    'SVR': SVR(),
    'KNeighborsRegressor': KNeighborsRegressor(),
    'GradientBoostingRegressor': GradientBoostingRegressor(),
    'ExtraTreesRegressor': ExtraTreesRegressor(),
    'MLPRegressor': MLPRegressor(),
    'XGBRegressor': XGBRegressor(n_estimators=100, random_state=42)
}

# Training and evaluating each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"{model_name} - Mean Squared Error: {mse}")


RandomForestRegressor - Mean Squared Error: 0.00019073333333334577
LinearRegression - Mean Squared Error: 6.804952470168233e-32
Ridge - Mean Squared Error: 0.0005079011076312915
Lasso - Mean Squared Error: 0.6421895833333334
ElasticNet - Mean Squared Error: 0.36844374044121003
DecisionTreeRegressor - Mean Squared Error: 0.0006666666666666663
SVR - Mean Squared Error: 0.009189235166194902
KNeighborsRegressor - Mean Squared Error: 0.013586666666666644
GradientBoostingRegressor - Mean Squared Error: 1.503453013120862e-05
ExtraTreesRegressor - Mean Squared Error: 3.2666666666672484e-05




MLPRegressor - Mean Squared Error: 0.027049582397114413
XGBRegressor - Mean Squared Error: 3.04367516704188e-06


In [None]:
##Hyper parameter tuning

In [18]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

# Assuming X_train and y_train are your training data
# Define the parameter grid to search
param_grid = {
    'n_estimators': [10, 20, 30],
    'max_depth': [20, 25],
    'min_samples_leaf': [5, 10],
    'max_features': [None, 'sqrt', 'log2']
}

# Instantiate the Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=0)

# Instantiate the GridSearchCV object
grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Get the best parameters and the best estimator
best_params = grid_search.best_params_
best_regressor = grid_search.best_estimator_

# Print the best parameters
print("Best Parameters:", best_params)

# Now you can use the best model for predictions
y_pred = best_regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Best Parameters: {'max_depth': 20, 'max_features': None, 'min_samples_leaf': 5, 'n_estimators': 30}
Mean Squared Error: 0.0010957555710707839


Running the fit and prediction of each model, hyperparameters

In [19]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


y_classes = np.digitize(y_train, bins=[-np.inf, 10, 20, np.inf], right=True)

X_train, X_test, y_train, y_test = train_test_split(X_train, y_classes, test_size=0.2, random_state=42)

#parameter grid to search
param_grid = {
    'n_estimators': [10, 20, 30],
    'max_depth': [20, 30],
    'min_samples_leaf': [5, 10, 20],
    'max_features': [None, 'sqrt', 'log2']
}

# Instantiating the RandomForestClassifier
rf_classifier = RandomForestClassifier(random_state=0)

# Instantiating the GridSearchCV object
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_classifier = grid_search.best_estimator_

# Printing the best parameters
print("Best Parameters:", best_params)

y_pred = best_classifier.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Best Parameters: {'max_depth': 20, 'max_features': None, 'min_samples_leaf': 5, 'n_estimators': 10}
Accuracy: 1.0
Mean Squared Error: 0.0


In [23]:
#performing the same procedure for the remaining algorith9ms
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

param_grid = {
    'n_estimators': [67, 89],
    'learning_rate': [0.1, 0.01],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_depth': [5, 7],
    'subsample': [1.0, 0.9]
}

gb_regressor = GradientBoostingRegressor(random_state=0)

grid_search = GridSearchCV(estimator=gb_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_regressor = grid_search.best_estimator_
print("Best Parameters:", best_params)

y_pred = best_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 67, 'subsample': 1.0}
Mean Squared Error: 0.0


In [28]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

param_grid = {
    'fit_intercept': [True, False],
    'positive': [False],  # 'positive' is the equivalent of 'normalize' in this context
    'copy_X': [True, False],
    'n_jobs': [2],
}

linear_model = LinearRegression()

grid_search = GridSearchCV(estimator=linear_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_linear_model = grid_search.best_estimator_
print("Best Parameters:", best_params)

y_pred = best_linear_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Best Parameters: {'copy_X': True, 'fit_intercept': True, 'n_jobs': 2, 'positive': False}
Mean Squared Error: 0.0


In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

X = df.drop('species', axis=1)
y = df['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

logistic_model = LogisticRegression()
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'max_iter': [50, 100, 200],
    'solver': ['liblinear', 'lbfgs', 'newton-cg', 'sag', 'saga']
}

grid_search = GridSearchCV(estimator=logistic_model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

accuracy = best_model.score(X_test, y_test)
print(f"Best parameters: {best_params}")
print(f"Accuracy on the test set: {accuracy}")


Best parameters: {'C': 1, 'max_iter': 50, 'solver': 'sag'}
Accuracy on the test set: 1.0


In [34]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

lasso_model = Lasso()
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 1.5, 2.0],  # Add more values as needed
    'max_iter': [30, 50, 100],  # Add more values as needed
}

grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_lasso_model = grid_search.best_estimator_

y_pred = best_lasso_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Mean Squared Error on Test Set: {mse}")


Best Parameters: {'alpha': 0.1, 'max_iter': 30}
Mean Squared Error on Test Set: 0.06670937073995876


In [35]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

elastic_net_model = ElasticNet()

param_grid = {
    'alpha': [0.1, 0.5, 1.0, 1.5, 2.0],  # Add more values as needed
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9],  # Add more values as needed
    'max_iter': [30, 50, 100],  # Add more values as needed
}

grid_search = GridSearchCV(estimator=elastic_net_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_elastic_net_model = grid_search.best_estimator_
y_pred = best_elastic_net_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Mean Squared Error on Test Set: {mse}")


Best Parameters: {'alpha': 0.1, 'l1_ratio': 0.1, 'max_iter': 30}
Mean Squared Error on Test Set: 0.04962916753855276


In [36]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

xgboost_model = xgb.XGBRegressor()
param_grid = {
    'max_depth': [56, 89],
    'learning_rate': [0.89, 0.76],
    'reg_alpha': [0.77],
    'reg_lambda': [0.78],
    'gamma': [0.68],
    'min_child_weight': [0.67],
    'subsample': [0.67],
    'colsample_bytree': [0.67],
    'n_estimators': [100, 200, 300],  # Add more values as needed
}

grid_search = GridSearchCV(estimator=xgboost_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_xgboost_model = grid_search.best_estimator_
y_pred = best_xgboost_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Mean Squared Error on Test Set: {mse}")


Best Parameters: {'colsample_bytree': 0.67, 'gamma': 0.68, 'learning_rate': 0.76, 'max_depth': 56, 'min_child_weight': 0.67, 'n_estimators': 200, 'reg_alpha': 0.77, 'reg_lambda': 0.78, 'subsample': 0.67}
Mean Squared Error on Test Set: 0.020483259185628915


In [38]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Assuming you have your data in X_train, X_test, y_train, y_test

# Define the Decision Tree Regressor model
decision_tree_model = DecisionTreeRegressor()

# Define the hyperparameter grid to search
param_grid = {
    'min_samples_split': [12, 6],
    'min_samples_leaf': [12, 6],
    'max_depth': [4, 7],
    'criterion': ['friedman_mse', 'squared_error', 'absolute_error', 'poisson'],
    'splitter': ['best', 'random'],
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=decision_tree_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Get the best parameters and the best estimator
best_params = grid_search.best_params_
best_decision_tree_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_decision_tree_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Mean Squared Error on Test Set: {mse}")

Best Parameters: {'criterion': 'absolute_error', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 6, 'splitter': 'random'}
Mean Squared Error on Test Set: 0.06666666666666667


In [39]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd

decision_tree_model = DecisionTreeClassifier()

param_grid = {
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}

grid_search = GridSearchCV(estimator=decision_tree_model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

print("Best Parameters:", best_params)
accuracy = best_estimator.score(X_test, y_test)
print("Accuracy on Test Set:", accuracy)


Best Parameters: {'criterion': 'entropy', 'min_samples_leaf': 4, 'min_samples_split': 2}
Accuracy on Test Set: 1.0


In [40]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import pandas as pd

svm_model = SVC()

param_grid = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'C': [566, 79],
    'gamma': ['scale', 'auto', 'custom'],
    'tol': [1e-7, 1e-6, 1e-5],
    'max_iter': [7, 10, 15]
}

grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

print("Best Parameters:", best_params)
accuracy = best_estimator.score(X_test, y_test)
print("Accuracy on Test Set:", accuracy)

Best Parameters: {'C': 566, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 15, 'tol': 1e-07}
Accuracy on Test Set: 0.9


In [41]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import pandas as pd

svm_model = SVC()

param_grid = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'C': [566, 79],
    'gamma': ['scale', 'auto', 'custom'],
    'tol': [1e-7, 1e-6, 1e-5],
    'max_iter': [7, 10, 15]
}

grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

print("Best Parameters:", best_params)
accuracy = best_estimator.score(X_test, y_test)
print("Accuracy on Test Set:", accuracy)


Best Parameters: {'C': 566, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 15, 'tol': 1e-07}
Accuracy on Test Set: 0.9


In [42]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

knn_model = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [78],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]
}
grid_search = GridSearchCV(estimator=knn_model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

print("Best Parameters:", best_params)
accuracy = best_estimator.score(X_test, y_test)
print("Accuracy on Test Set:", accuracy)

Best Parameters: {'algorithm': 'auto', 'n_neighbors': 78, 'p': 2, 'weights': 'distance'}
Accuracy on Test Set: 1.0
