In [None]:
from Notebook.DSNotebook.NotebookExecutor import NotebookExecutor
nb = NotebookExecutor()
df = nb.get_data('11561712724177662', '@SYS.USERID', 'True', {}, [])
df
# The first function parameter refers to the service ID of the dataset.
# @SYS.USERID refers to the user ID of the current user.
# If the Sandbox key is 'false', it is referred to as a dataset, and if it's 'true', then the file is a sandbox file.
# {} refers to the filters applied to the dataset.
# [] refers to the data preparations applied to the dataset.
# After [], users can specify the number of rows to limit the headcount of the dataset with a comma separator.

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
encoder = LabelEncoder()

# Fit and transform the column 'Category'
df['sex'] = encoder.fit_transform(df['sex'])

# Optionally, replace the original column with the encoded values
# df.drop('sex', axis=1, inplace=True)

# Show the updated DataFrame
print(df)

In [None]:
import pandas as pd
from time import perf_counter as get_time
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Define the necessary variables here
_data   = df    # pd.DataFrame: Full data to process
_target = 'sex'    # string: Column name of the target variable

if _data is None or _target is None:
    raise Exception(f'Both _data and _target must be specified')
elif not (isinstance(_data, pd.DataFrame) and isinstance(_target, str)):
    raise Exception(f'Datatype of _data must be pd.DataFrame; that of _target must be str')

# Separating the independent and dependent variables into X and y respectively
y = _data[_target]
X = _data.drop(columns=_target)
print(f'Shape of complete data: {_data.shape}')

# Splitting the dataset into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print(f'Shape of training data: {X_train.shape}')
print(f'Shape of testing data : {X_test.shape}')

# Creating the classifier and fitting it to the training data
log_reg = LogisticRegression()
time_now = get_time()
log_reg.fit(X_train, y_train);
print(f'Model {log_reg} trained')
print(f'Seconds elapsed: {round(get_time() - time_now, 3)}')

# Making predictions on the training data
predict_train = log_reg.predict(X_train)
print(f'Predictions on training data made')

# Finding the accuracy score of the training predictions
accuracy_train = accuracy_score(y_train, predict_train)
print(f'Accuracy score of training predictions: {round(accuracy_train, 3)}')

# Printing the classification report of the training predictions
report_train = classification_report(y_train, predict_train, digits=3)
print(f'Classification report of training predictions:')
print(report_train)

# Making predictions on the testing data
predict_test = log_reg.predict(X_test)
print(f'Predictions on testing data made')

# Finding the accuracy score of the testing predictions
accuracy_test = accuracy_score(y_test, predict_test)
print(f'Accuracy score of testing predictions: {round(accuracy_test, 3)}')

# Printing the classification report of the testing predictions
report_test = classification_report(y_test, predict_test, digits=3)
print(f'Classification report of testing predictions:')
print(report_test)

In [None]:
from Notebook.DSNotebook.NotebookExecutor import NotebookExecutor
nb = NotebookExecutor()
saved_model = nb.save_model(model = log_reg, modelName = 'Logistic_Regression_V3', modelType = 'ml', X = X_train, y = y_train, estimator_type='classification')
#X and y are training datasets to get explainer dashboard.
#estimator_type is to specify algorithm type i.e., classification and regression.
#Only 'ml’ models with tabular data as input will support in Explainer Dashboard.
#Choose modelType = 'ml' for machine learning models, modelType = 'cv' for computer vision models and modelType = 'dp' for data transformation pickle files. 
#Provide ‘column_headers’ as a parameter if they have to be saved in the model.
#If using custom layer in keras, use native save functionality from keras.