## 참고: https://hleecaster.com/ml-linear-regression-example/

## 라이브러리 설치, 호출

!pip3 install -U scikit-learn<br>
!pip3 install pandas<br>
!pip3 install numpy<br>
!pip3 install matplotlib<br>
!pip3 install statsmodels<br>

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau

In [2]:
from tensorflow.keras.preprocessing import sequence

In [3]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder, StandardScaler

# Importing library to split the data into training part and testing part.
from sklearn.model_selection import train_test_split

# Correlation finding
from sklearn.feature_selection import chi2
import scipy.stats as stats

# Constant feature checking
from sklearn.feature_selection import VarianceThreshold

# RandomOverSampler to handle imbalanced data
from imblearn.over_sampling import RandomOverSampler

# Cross Validation
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import classification_report
from sklearn import metrics

## 데이터 다운로드 (Attrition of Company)

In [4]:
import pandas as pd
df = pd.read_csv('C:/Users/Home/Desktop/이어드림/딥러닝미니프로젝트/SimpleProject2/general_data.csv')

df

Unnamed: 0,EmployeeID,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,Gender,...,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager,EnvironmentSatisfaction,JobSatisfaction,WorkLifeBalance,JobInvolvement,PerformanceRating
0,1,51,No,Travel_Rarely,Sales,6,2,Life Sciences,1,Female,...,1.0,6,1,0,0,3.0,4.0,2.0,3,3
1,2,31,Yes,Travel_Frequently,Research & Development,10,1,Life Sciences,1,Female,...,6.0,3,5,1,4,3.0,2.0,4.0,2,4
2,3,32,No,Travel_Frequently,Research & Development,17,4,Other,1,Male,...,5.0,2,5,0,3,2.0,2.0,1.0,3,3
3,4,38,No,Non-Travel,Research & Development,2,5,Life Sciences,1,Male,...,13.0,5,8,7,5,4.0,4.0,3.0,2,3
4,5,32,No,Travel_Rarely,Research & Development,10,1,Medical,1,Male,...,9.0,2,6,0,4,4.0,1.0,3.0,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4405,4406,42,No,Travel_Rarely,Research & Development,5,4,Medical,1,Female,...,10.0,5,3,0,2,4.0,1.0,3.0,3,3
4406,4407,29,No,Travel_Rarely,Research & Development,2,4,Medical,1,Male,...,10.0,2,3,0,2,4.0,4.0,3.0,2,3
4407,4408,25,No,Travel_Rarely,Research & Development,25,2,Life Sciences,1,Male,...,5.0,4,4,1,2,1.0,3.0,3.0,3,4
4408,4409,42,No,Travel_Rarely,Sales,18,2,Medical,1,Male,...,10.0,2,9,7,8,4.0,1.0,3.0,2,3


In [5]:
#df 라벨인코더

col = ['Attrition', 'BusinessTravel', 'Department', 'EducationField', 'Gender', 'JobRole', 'MaritalStatus']

encoder_Att = LabelEncoder()
encoder_Busi = LabelEncoder()
encoder_Depar = LabelEncoder()
encoder_Edu = LabelEncoder()
encoder_Gender = LabelEncoder()
encoder_Job = LabelEncoder()
encoder_Mari = LabelEncoder()
df['Attrition'] = encoder_Att.fit_transform(df['Attrition'])
df['BusinessTravel'] = encoder_Busi.fit_transform(df['BusinessTravel'])
df['Department'] = encoder_Depar.fit_transform(df['Department'])
df['EducationField'] = encoder_Edu.fit_transform(df['EducationField'])
df['Gender'] = encoder_Gender.fit_transform(df['Gender'])
df['JobRole'] = encoder_Job.fit_transform(df['JobRole'])
df['MaritalStatus'] = encoder_Mari.fit_transform(df['MaritalStatus'])

#attrition yes=1, no-0   
#BusinessTravel Travel_Rarely=2 Travel_Frequently=1 Non-Travel =0
#Department Sales=2 Research & Development=1	 'Human Resources'=0
#EducationField Life Sciences = 1 Marketing =2 Medical =  3 Other = 4 'Technical Degree'=5, 'Human Resources'=0
#Gender Female =0 Male=1
#JobRole Sales Representative=8 Sales Executive=7 Research Scientist=6 Research Director=5 Manufacturing Director=4 Manager=3 Laboratory Technician=2 Human Resources=1 Healthcare Representative=0
#MaritalStatus Divorced=0 Married=1 Single=2


In [6]:
df

Unnamed: 0,EmployeeID,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,Gender,...,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager,EnvironmentSatisfaction,JobSatisfaction,WorkLifeBalance,JobInvolvement,PerformanceRating
0,1,51,0,2,2,6,2,1,1,0,...,1.0,6,1,0,0,3.0,4.0,2.0,3,3
1,2,31,1,1,1,10,1,1,1,0,...,6.0,3,5,1,4,3.0,2.0,4.0,2,4
2,3,32,0,1,1,17,4,4,1,1,...,5.0,2,5,0,3,2.0,2.0,1.0,3,3
3,4,38,0,0,1,2,5,1,1,1,...,13.0,5,8,7,5,4.0,4.0,3.0,2,3
4,5,32,0,2,1,10,1,3,1,1,...,9.0,2,6,0,4,4.0,1.0,3.0,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4405,4406,42,0,2,1,5,4,3,1,0,...,10.0,5,3,0,2,4.0,1.0,3.0,3,3
4406,4407,29,0,2,1,2,4,3,1,1,...,10.0,2,3,0,2,4.0,4.0,3.0,2,3
4407,4408,25,0,2,1,25,2,1,1,1,...,5.0,4,4,1,2,1.0,3.0,3.0,3,4
4408,4409,42,0,2,2,18,2,3,1,1,...,10.0,2,9,7,8,4.0,1.0,3.0,2,3


## Keras Logit 모델 fitting

In [7]:
import tensorflow as tf

# tf.keras 에 필요한 함수들이 모여있습니다.
from tensorflow.keras import datasets, utils
from tensorflow.keras import models, layers, activations, initializers, losses, optimizers, metrics

In [8]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information

In [9]:
model = models.Sequential() # Build up the "Sequence" of layers (Linear stack of layers)

# Dense-layer (with he-initialization)
model.add(layers.Dense(input_dim=16, units=256, activation=None, kernel_initializer=initializers.he_uniform())) # he-uniform initialization
model.add(layers.BatchNormalization()) # Use this line as if needed
model.add(layers.Activation('elu')) # elu or relu (or layers.ELU / layers.LeakyReLU)

model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.BatchNormalization()) # Use this line as if needed
model.add(layers.Activation('elu')) 

model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.BatchNormalization()) # Use this line as if needed
model.add(layers.Activation('elu'))

model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.BatchNormalization()) # Use this line as if needed
model.add(layers.Activation('elu')) 
model.add(layers.Dropout(rate=0.5)) # Dropout-layer

model.add(layers.Dense(units=2, activation='softmax')) # Apply softmax function on model's output

In [10]:
# "Compile" the model description (Configures the model for training)

model.compile(optimizer=optimizers.Adam(), # Please try the Adam-optimizer
              loss=losses.categorical_crossentropy, 
              metrics=[metrics.categorical_accuracy]) # Precision / Recall / F1-Score 적용하기 @ https://j.mp/3cf3lbi

# Model Load 하기 전, hdf5 파일 이름 꼭 확인하기

In [11]:
model.load_weights("-028-0.1992-0.9238.hdf5")

## FLASK 셋팅하기

In [12]:
from flask import Flask
from flask import render_template
from flask import request

In [13]:
app = Flask(__name__)

# FLASK API 구현부분

In [14]:
@app.route('/')
@app.route('/Attrition2')
def Attrition2():
    Age = request.args.get('Age'),
    DistanceFromHome= request.args.get('DistanceFromHome')
    Education= request.args.get('Education')
    EducationField= request.args.get('EducationField')
    EducationField = "{}".format(EducationField)
    EducationField=EducationField.replace("+"," ")
    JobRole= request.args.get('JobRole')
    JobRole = "{}".format(JobRole)
    JobRole=JobRole.replace("+"," ")
    MaritalStatus= request.args.get('MaritalStatus')
    NumCompaniesWorked= request.args.get('NumCompaniesWorked')
    PercentSalaryHike= request.args.get('PercentSalaryHike')
    TotalWorkingYears= request.args.get('TotalWorkingYears')
    TrainingTimesLastYear= request.args.get('TrainingTimesLastYear')
    YearsAtCompany= request.args.get('YearsAtCompany')
    YearsSinceLastPromotion=request.args.get('YearsSinceLastPromotion')
    YearsWithCurrManager= request.args.get('YearsWithCurrManager')
    EnvironmentSatisfaction=request.args.get('EnvironmentSatisfaction')
    JobSatisfaction =request.args.get('JobSatisfaction')
    WorkLifeBalance= request.args.get('WorkLifeBalance')

        
    if Age==None or DistanceFromHome==None or Education==None or EducationField==None or JobRole==None or MaritalStatus==None or NumCompaniesWorked==None or PercentSalaryHike==None or TotalWorkingYears==None or TrainingTimesLastYear==None or YearsAtCompany==None or YearsSinceLastPromotion==None or YearsWithCurrManager==None or EnvironmentSatisfaction==None or JobSatisfaction==None or WorkLifeBalance==None:
        return render_template('Attrition2.html', Output = 'X')
    
    Input = pd.DataFrame({
        'Age':[Age], 
        'DistanceFromHome':[int(DistanceFromHome)],
        'Education':[int(Education)], 
        'EducationField':encoder_Edu.transform(np.asarray([EducationField])).astype(np.int), 
        'JobRole':encoder_Job.transform(np.asarray([JobRole])).astype(np.int), 
        'MaritalStatus':encoder_Mari.transform(np.asarray([MaritalStatus])).astype(np.int), 
        'NumCompaniesWorked':[int(NumCompaniesWorked)], 
        'PercentSalaryHike':[int(PercentSalaryHike)], 
        'TotalWorkingYears':[int(TotalWorkingYears)], 
        'TrainingTimesLastYear':[int(TrainingTimesLastYear)], 
        'YearsAtCompany':[int(YearsAtCompany)], 
        'YearsSinceLastPromotion':[int(YearsSinceLastPromotion)], 
        'YearsWithCurrManager':[int(YearsWithCurrManager)], 
        'EnvironmentSatisfaction':[int(EnvironmentSatisfaction)], 
        'JobSatisfaction':[int(JobSatisfaction)], 
        'WorkLifeBalance':[int(WorkLifeBalance)]
                       
    })
#     input['EducationField'] = tf.keras.preprocessing.sequence.pad_sequences(input['EducationField'],
#                                                        value=word_to_index['<PAD>'],
#                                                        padding='post')
#     input['JobRole'] = tf.keras.preprocessing.sequence.pad_sequences(input['JobRole'],
#                                                        value=word_to_index['<PAD>'],
#                                                        padding='post')
#     input['MaritalStatus'] = tf.keras.preprocessing.sequence.pad_sequences(input['MaritalStatus'],
#                                                        value=word_to_index['<PAD>'],
#                                                        padding='post')
    

    ModelOutput = model.predict(Input)
    print(input)

    return render_template('Attrition2.html', Output = ModelOutput)

# Flask, port 5000으로 실행

In [15]:
app.run(host='0.0.0.0', port=5000)

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.0.8:5000 (Press CTRL+C to quit)
127.0.0.1 - - [11/Jul/2022 21:13:30] "GET / HTTP/1.1" 200 -
[2022-07-11 21:13:42,437] ERROR in app: Exception on /Attrition2 [GET]
Traceback (most recent call last):
  File "C:\Users\Home\miniforge3\envs\dl\lib\site-packages\flask\app.py", line 2077, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\Home\miniforge3\envs\dl\lib\site-packages\flask\app.py", line 1525, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\Home\miniforge3\envs\dl\lib\site-packages\flask\app.py", line 1523, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\Home\miniforge3\envs\dl\lib\site-packages\flask\app.py", line 1509, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\Home\AppData\Local\Temp\ipykernel_23408\1933785767.py", line 59

In [16]:
# http://127.0.0.1:5000/SpecialSale/1/38