In [50]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pandas as pd

import os
import glob
import argparse

def main(args):
    
    # read data
    df= get_csvs_df(args.training_data)
    
    #split data
    X_train, X_test, y_train, y_test = split_data(df)
    
    # train model 
    train_model(args.reg_rate, X_train, X_test, y_train, y_test)
    

def get_csvs_df(path):
    if not os.path.exists(path):
        raise RuntimeError(f"Cannot use non-existent path provided: {path}")
    csv_files= glob.glob(f"{path}/*.csv")
    if not csv_files:
        raise RuntimeError(f"No CSV file found in provide data path: {path}")
        # df = pd.read_csv('data/diabetes.csv')
    return pd.concat((pd.read_csv(f) for f in csv_files), sort=False)
    
    

def split_data(df):
    #splitdata
    X, y = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, df['Diabetic'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

    return X_train, X_test, y_train, y_test

def train_model(reg_rate, X_train, X_test, y_train, y_test):
    LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train)

def parse_args():
    #setup arg parser
    parser = argparse.ArgumentParser()

    # add arguments
    parser.add_argument("--training_data", dest='training_data',
                        type=str)
    parser.add_argument("--reg_rate", dest='reg_rate',
                        type=float, default=0.01)

    # parse args
    args = parser.parse_args()

    # return args
    return args

# run script
if __name__ == "__main__":
    # add space in logs
    print("\n\n")
    print("*" * 60)

    # parse args
    args = parse_args()

    # run main function
    main(args)

    # add space in logs
    print("*" * 60)
    print("\n\n")
