# Importing modules

In [1]:
!pip install autogluon









In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.features.generators import AutoMLPipelineFeatureGenerator
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib


# Data Pre-processing

In [3]:
def preprocess(data):
    data = data.dropna()
    data = data.drop(columns=['Unnamed: 0','id'])
    X = data.drop(columns=['satisfaction'])
    y = data['satisfaction']
    y = [1 if labels=='satisfied' else 0 for labels in data['satisfaction']]
    X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42, train_size = .80)
    return X_train, X_val, y_train, y_val


# Feature Transformation

In [4]:
def transformation(X_train, X_val, y_train, y_val):
    auto_ml_pipeline_feature_generator = AutoMLPipelineFeatureGenerator()
    auto_ml_pipeline_feature_generator.fit(X=X_train)
    X_train_transformed = auto_ml_pipeline_feature_generator.transform(X=X_train)
    #Scaling the data
    scaler = RobustScaler()
    X_train_normalized = scaler.fit_transform(X_train_transformed)
    return X_train_normalized, y_train, auto_ml_pipeline_feature_generator, scaler


# Logistic Regression

In [5]:
def train_model(X_train_normalized,y_train, y_val, feature_gen, scaler):
    lr_model = LogisticRegression()
    lr_model.fit(X_train_normalized, y_train) 
    # Printing the classification report
    print(f"The training classification report is {classification_report(lr_model.predict(X_train_normalized), y_train)}")
    X_val_transformed = feature_gen.transform(X=X_val)
    X_val_norm = scaler.transform(X_val_transformed)
    print(f"The test classification report is {print(classification_report(lr_model.predict(X_val_norm), y_val))}")
    return lr_model

# SAVE MODEL


In [6]:
def save_model(model):
    model_name = "scikit_class_model_v2.joblib"
    joblib.dump(model, r"../models/"+model_name)
    print(f"Successfully saved {model_name} to ../models/{model_name}")

# Chaining it all together

In [7]:
data = pd.read_csv(r'../data/train.csv')
X_train, X_val, y_train, y_val = preprocess(data)
X_train_normalized, y_train, feature_gen, scaler = transformation(X_train, X_val, y_train, y_val)
model = train_model(X_train_normalized,y_train, y_val, feature_gen, scaler)
save_model(model)




The training classification report is               precision    recall  f1-score   support

           0       0.91      0.88      0.89     48488
           1       0.84      0.87      0.85     34387

    accuracy                           0.88     82875
   macro avg       0.87      0.88      0.87     82875
weighted avg       0.88      0.88      0.88     82875

              precision    recall  f1-score   support

           0       0.91      0.87      0.89     12107
           1       0.83      0.87      0.85      8612

    accuracy                           0.87     20719
   macro avg       0.87      0.87      0.87     20719
weighted avg       0.88      0.87      0.87     20719

The test classification report is None
Successfully saved scikit_class_model_v2.joblib to ../models/scikit_class_model_v2.joblib
