In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import joblib
import yaml

In [5]:
# folder to load config file
CONFIG_PATH = "D:/mlops-folder/"
# Function to load yaml configuration file
def load_config(config_name):
    with open(os.path.join(CONFIG_PATH, config_name)) as file:
        config = yaml.safe_load(file)
    return config
config = load_config("params.yaml")

In [16]:
config

{'base': {'project': 'mlops-insurance',
  'random_state': 32,
  'target_col': 'charges'},
 'data_source': {'s3_source': 'source_code/insurance_course5i.csv'},
 'load_data': {'raw_dataset_csv': 'data/raw/insurance_course5i.csv'},
 'split_data': {'train_path': 'data/processed/train_insurance_course5i.csv',
  'test_path': 'data/processed/test_insurance_course5i.csv',
  'test_size': 0.2},
 'estimators': {'ElasticNet': {'params': {'alpha': 0.7, 'l1_ratio': 0.4}}},
 'model_dir': 'models',
 'reports': {'params': 'reports/params.json', 'scores': 'reports/scores.json'},
 'webapp_model_dir': 'prediction_service/model/model.joblib'}

In [21]:
config['data_source']['data'] = 'source_code/expenses.csv'

In [22]:
config

{'base': {'project': 'mlops-insurance',
  'random_state': 32,
  'target_col': 'charges'},
 'data_source': {'s3_source': 'source_code/insurance_course5i.csv',
  'data': 'source_code/expenses.csv'},
 'load_data': {'raw_dataset_csv': 'data/raw/insurance_course5i.csv'},
 'split_data': {'train_path': 'data/processed/train_insurance_course5i.csv',
  'test_path': 'data/processed/test_insurance_course5i.csv',
  'test_size': 0.2},
 'estimators': {'ElasticNet': {'params': {'alpha': 0.7, 'l1_ratio': 0.4}}},
 'model_dir': 'models',
 'reports': {'params': 'reports/params.json', 'scores': 'reports/scores.json'},
 'webapp_model_dir': 'prediction_service/model/model.joblib'}

In [24]:
data = pd.read_csv(CONFIG_PATH+config['data_source']['data'])

In [60]:
X = data.iloc[:,:]
Y = data.iloc[:,-1]

In [61]:
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
X['region'] = label.fit_transform(X['region'])

In [62]:
label1 = LabelEncoder()
X['smoker'] = label1.fit_transform(X['smoker'])

label2 = LabelEncoder()
X['sex'] = label2.fit_transform(X['sex'])

In [63]:
X.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [64]:
X.to_csv(r'D:\mlops-folder\source_code\insurance_course5i.csv')

In [45]:
from sklearn.linear_model import ElasticNet

model =ElasticNet(alpha=config['estimators']['ElasticNet']['params']['alpha'],
                  l1_ratio=config['estimators']['ElasticNet']['params']['l1_ratio'])

In [47]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size = config['split_data']['test_size'])

In [48]:
x_train.shape,y_train.shape

((1070, 6), (1070,))

In [49]:
model.fit(x_train,y_train)

In [50]:
y_pred = model.predict(x_test)

In [51]:
out=pd.DataFrame({'y':y_test, 'y_pred':y_pred})
out

Unnamed: 0,y,y_pred
135,2155.68150,6399.448922
251,47305.30500,24336.988438
1087,11353.22760,16443.363229
942,2217.46915,10648.898615
517,8413.46305,14004.679786
...,...,...
507,3077.09550,6043.851143
426,6555.07035,10885.196726
1199,4934.70500,7964.723054
1153,5630.45785,12931.079066


In [53]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
mean_absolute_error(y_test,y_pred)

6859.247642224163

In [55]:
X

Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,0,27.900,0,1,3
1,18,1,33.770,1,0,2
2,28,1,33.000,3,0,2
3,33,1,22.705,0,0,1
4,32,1,28.880,0,0,1
...,...,...,...,...,...,...
1333,50,1,30.970,3,0,1
1334,18,0,31.920,0,0,0
1335,18,0,36.850,0,0,2
1336,21,0,25.800,0,0,3


In [56]:
import argparse

In [57]:
args = argparse.ArgumentParser()
args.add_argument("--config",default="params.yaml")

_StoreAction(option_strings=['--config'], dest='config', nargs=None, const=None, default='params.yaml', type=None, choices=None, help=None, metavar=None)

In [59]:
parsed_args = args.parse_args()

usage: ipykernel_launcher.py [-h] [--config CONFIG]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\annamalai\AppData\Roaming\jupyter\runtime\kernel-8151efb2-dca8-44a4-95cd-f637206e1374.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
parsed_args