In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV,GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn. pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder


from jcopml.tuning import grid_search_params as gsp
from jcopml.tuning import random_search_params as rsp
from jcopml.pipeline import cat_pipe,num_pipe
from jcopml.plot import plot_missing_value
from jcopml.feature_importance import mean_score_decrease
from jcopml.plot import plot_residual


In [13]:
df=pd.read_csv('data/weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 3 columns):
Gender    10000 non-null object
Height    10000 non-null float64
Weight    10000 non-null float64
dtypes: float64(2), object(1)
memory usage: 234.5+ KB


In [15]:
X = df.drop(columns='Weight')
y = df.Weight

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8000, 2), (2000, 2), (8000,), (2000,))

In [16]:
X_train.columns

Index(['Gender', 'Height'], dtype='object')

In [17]:
gsp.linreg_params

{'algo__fit_intercept': [True, False]}

In [19]:
preprocessor= ColumnTransformer([
    ('numeric', num_pipe(scaling='standard'),['Height']),
    ('categoric', cat_pipe(encoder='ordinal'),['Gender'])
])

pipiline= Pipeline([
    ('prep', preprocessor),
    ('algo', LinearRegression())
])

model= RandomizedSearchCV(pipiline, gsp.linreg_params, cv= 3,n_iter=50, n_jobs=-1, verbose=1)
model.fit(X_train,y_train)

print(model.best_params_)
print(model.score(X_train,y_train), model.best_score_,model.score(X_test,y_test))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.1s finished


Fitting 3 folds for each of 2 candidates, totalling 6 fits
{'algo__fit_intercept': True}
0.9025904849221458 0.9023858419346084 0.9033321846361925


In [25]:
numerical_pipeline=Pipeline([
    ("scaler",StandardScaler())
])
categorical_pipeline=Pipeline([
    ("encode",OrdinalEncoder()),
])

preprocessor= ColumnTransformer([
    ("numeric",numerical_pipeline,["Height"]),
    ("categorical",categorical_pipeline, ["Gender"])
])

pipiline= Pipeline([
    ('prep', preprocessor),
    ('algo', LinearRegression())
])
parameter={'algo__fit_intercept': [True, False]}

model= GridSearchCV(pipiline, param_grid=parameter, cv= 3, n_jobs=-1, verbose=1)
model.fit(X_train,y_train)

print(model.best_params_)
print(model.score(X_train,y_train), model.best_score_,model.score(X_test,y_test))

Fitting 3 folds for each of 2 candidates, totalling 6 fits
{'algo__fit_intercept': True}
0.9025904849221458 0.9023858419346084 0.9033321846361925


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.0s finished


In [26]:
import pickle
import os

In [27]:
os.makedirs("model", exist_ok=True)

In [28]:
pickle.dump(model, open("model/linear-weight.pkl", "wb")) #wb= write binary

In [None]:
import pandas as pd
import pickle
from flask import Flask, render_template, request

app = Flask(__name__)
model=pickle.load(open("model/linear-weight.pkl","rb"))

@app.route("/", methods=["GET","POST"])
def index():
    if request.method == "GET":
        return render_template("portofolio2.html")
    elif request.method == "POST":
        csvfile = request.files.get("file")
        X_test = pd.read_csv(csvfile)
        X_test["pred"] = model.predict(X_test)
        return X_test.to_html()

if __name__ == "__main__":
    app.run(host="0.0.0.0", port="5000")

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses.
 * Running on http://192.168.43.60:5000/ (Press CTRL+C to quit)
192.168.43.60 - - [17/Apr/2022 20:38:48] "GET / HTTP/1.1" 200 -
192.168.43.60 - - [17/Apr/2022 20:38:48] "GET /favicon.ico HTTP/1.1" 404 -
