In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('synthetic_dataset.csv')  # Update with the actual path

# Assuming the first 1000 samples are for classification (binary target) and the rest are for regression
df_class = df.iloc[:1000]  # Classification part
df_regress = df.iloc[1000:]  # Regression part

# Prepare datasets for classification
X_class = df_class.drop('target', axis=1).values
y_class = df_class['target'].values.astype('int')  # Ensuring target is integer for classification

# Prepare datasets for regression
X_regress = df_regress.drop('target', axis=1).values
y_regress = df_regress['target'].values

# Split datasets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
X_train_regress, X_test_regress, y_train_regress, y_test_regress = train_test_split(X_regress, y_regress, test_size=0.2, random_state=42)

# Standardize the features for both tasks
scaler_class = StandardScaler().fit(X_train_class)
X_train_class_scaled = scaler_class.transform(X_train_class)
X_test_class_scaled = scaler_class.transform(X_test_class)

scaler_regress = StandardScaler().fit(X_train_regress)
X_train_regress_scaled = scaler_regress.transform(X_train_regress)
X_test_regress_scaled = scaler_regress.transform(X_test_regress)

1. Implement MLP to classify the given data set and analyse the performance of the classifier. 

In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, alpha=0.01, learning_rate_init=0.001, random_state=42)
mlp_classifier.fit(X_train_class_scaled, y_train_class)

y_pred_class = mlp_classifier.predict(X_test_class_scaled)
initial_accuracy_mlp = accuracy_score(y_test_class, y_pred_class)

print("Accuracy:", initial_accuracy_mlp)
print("Classification Report:\n", classification_report(y_test_class, y_pred_class))

Accuracy: 0.955
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.94      0.96       106
           1       0.94      0.97      0.95        94

    accuracy                           0.95       200
   macro avg       0.95      0.96      0.95       200
weighted avg       0.96      0.95      0.96       200



2. Implement MLP for regression task.

In [3]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

mlp_regressor = MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, alpha=0.01, learning_rate_init=0.001, random_state=42)
mlp_regressor.fit(X_train_regress_scaled, y_train_regress)

y_pred_regress = mlp_regressor.predict(X_test_regress_scaled)
print("MSE:", mean_squared_error(y_test_regress, y_pred_regress))
print("R^2 Score:", r2_score(y_test_regress, y_pred_regress))

MSE: 78.35172959851523
R^2 Score: 0.9984617903860581




3. Improve the performance of the model by adjusting the hyperparameters such as number of hidden nodes, learning rate parameter, momentum etc. 

In [4]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, accuracy_score

# Define a range of potential values for hyperparameters
param_grid_mlp = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'learning_rate_init': [0.001, 0.01, 0.1],
    'alpha': [0.0001, 0.001, 0.01],  # L2 penalty (regularization term) parameter.
    'momentum': [0.9, 0.95, 0.99]
}

# Initialize the MLPClassifier
mlp = MLPClassifier(max_iter=1000, random_state=42)

# Make scorer using accuracy
accuracy_scorer = make_scorer(accuracy_score)

# Initialize the GridSearchCV object
grid_search_mlp = GridSearchCV(estimator=mlp, param_grid=param_grid_mlp, cv=5, verbose=0, scoring=accuracy_scorer)

# Fit the grid search to the data for classification
grid_search_mlp.fit(X_train_class_scaled, y_train_class)

# Re-train MLP with the best parameters
best_mlp = MLPClassifier(**grid_search_mlp.best_params_, max_iter=1000, random_state=42)
best_mlp.fit(X_train_class_scaled, y_train_class)

# Evaluate the performance on the test set
y_pred_class_best = best_mlp.predict(X_test_class_scaled)
new_accuracy_mlp = accuracy_score(y_test_class, y_pred_class_best)
new_classification_report_mlp = classification_report(y_test_class, y_pred_class_best)

print("New Accuracy for MLP:", new_accuracy_mlp)
print("New Classification Report for MLP:\n", new_classification_report_mlp)
print(f"Improvement in MLP Accuracy: {new_accuracy_mlp - initial_accuracy_mlp}")

New Accuracy for MLP: 0.97
New Classification Report for MLP:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97       106
           1       0.97      0.97      0.97        94

    accuracy                           0.97       200
   macro avg       0.97      0.97      0.97       200
weighted avg       0.97      0.97      0.97       200

Improvement in MLP Accuracy: 0.015000000000000013


4. Implement SVM to classify the given data set and analyse the performance of the classifier. 

In [5]:
from sklearn.svm import SVC

svc_classifier = SVC(kernel='linear', C=1.0, random_state=42)
svc_classifier.fit(X_train_class_scaled, y_train_class)

y_pred_svc_class = svc_classifier.predict(X_test_class_scaled)
initial_accuracy_svc = accuracy_score(y_test_class, y_pred_svc_class)

print("Accuracy:", initial_accuracy_svc)
print("Classification Report:\n", classification_report(y_test_class, y_pred_svc_class))

Accuracy: 0.805
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.81      0.82       106
           1       0.79      0.80      0.79        94

    accuracy                           0.81       200
   macro avg       0.80      0.80      0.80       200
weighted avg       0.81      0.81      0.81       200



5. Implement SVM for regression task.

In [6]:
from sklearn.svm import SVR

svr_regressor = SVR(kernel='linear', C=1.0)
svr_regressor.fit(X_train_regress_scaled, y_train_regress)

y_pred_svr_regress = svr_regressor.predict(X_test_regress_scaled)
print("MSE:", mean_squared_error(y_test_regress, y_pred_svr_regress))
print("R^2 Score:", r2_score(y_test_regress, y_pred_svr_regress))

MSE: 0.04872490986734624
R^2 Score: 0.9999990434273094


6. Improve the performance of the model by adjusting the hyperparameters such as number of hidden nodes, learning rate parameter, momentum etc.

In [7]:
from sklearn.svm import SVC

# Define the parameter grid to search
param_grid_svc = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.001],
    'kernel': ['rbf', 'linear', 'poly']
}

# Initialize the SVC model
svc = SVC(random_state=42)

# Initialize the GridSearchCV object for SVC
grid_search_svc = GridSearchCV(estimator=svc, param_grid=param_grid_svc, cv=5, verbose=0, scoring='accuracy')

# Fit the grid search to the data for classification
grid_search_svc.fit(X_train_class_scaled, y_train_class)

# Re-train SVC with the best parameters
best_svc = SVC(**grid_search_svc.best_params_, random_state=42)
best_svc.fit(X_train_class_scaled, y_train_class)

# Evaluate the performance on the test set
y_pred_svc_best = best_svc.predict(X_test_class_scaled)
new_accuracy_svc = accuracy_score(y_test_class, y_pred_svc_best)
new_classification_report_svc = classification_report(y_test_class, y_pred_svc_best)

print("New Accuracy for SVC:", new_accuracy_svc)
print("New Classification Report for SVC:\n", new_classification_report_svc)
print(f"Improvement in SVC Accuracy: {new_accuracy_svc - initial_accuracy_svc}")

New Accuracy for SVC: 0.97
New Classification Report for SVC:
               precision    recall  f1-score   support

           0       0.99      0.95      0.97       106
           1       0.95      0.99      0.97        94

    accuracy                           0.97       200
   macro avg       0.97      0.97      0.97       200
weighted avg       0.97      0.97      0.97       200

Improvement in SVC Accuracy: 0.16499999999999992


In [22]:
from mlxtend import preprocessing
help(preprocessing.onehot)

Help on module mlxtend.preprocessing.onehot in mlxtend.preprocessing:

NAME
    mlxtend.preprocessing.onehot

DESCRIPTION
    # Sebastian Raschka 2014-2024
    # mlxtend Machine Learning Library Extensions
    # Author: Sebastian Raschka <sebastianraschka.com>
    #
    # License: BSD 3 clause

FUNCTIONS
    one_hot(y, num_labels='auto', dtype='float')
        One-hot encoding of class labels
        
        Parameters
        ----------
        y : array-like, shape = [n_classlabels]
            Python list or numpy array consisting of class labels.
        num_labels : int or 'auto'
            Number of unique labels in the class label array. Infers the number
            of unique labels from the input array if set to 'auto'.
        dtype : str
            NumPy array type (float, float32, float64) of the output array.
        
        Returns
        ----------
        ary : numpy.ndarray, shape = [n_classlabels]
            One-hot encoded array, where each sample is represente