In [150]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn import svm
from sklearn.svm import SVC
from sklearn import metrics  
from sklearn.metrics import accuracy_score,precision_score, recall_score
import time

In [151]:
df=pd.read_csv("iris.csv")
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [152]:
X=df.drop(columns=['Id','Species'],axis=1)
Y=df['Species']

In [153]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=2)
scaled = StandardScaler()
scaled.fit(X_train)
X_train = scaled.transform(X_train)
X_test = scaled.transform(X_test)

In [154]:
def randomforest(X_train,Y_train,X_test):
    model = RandomForestClassifier(criterion='gini', max_depth=5, min_samples_leaf=5,
                       n_estimators=100, n_jobs=-1, oob_score=True,
                       random_state=42) 
    start_time = time.time()
    model.fit(X_train, Y_train)
    end_time = time.time()
    training_time = end_time - start_time
    y_pred = model.predict(X_test)
    return y_pred,model,training_time
def eval_metric(model,y_pred,training_time):
    oob=model.oob_score_
    acc=accuracy_score(Y_test, y_pred)
    precision = precision_score(Y_test, y_pred,average='macro')
    recall = recall_score(Y_test, y_pred,average='macro')
    return oob,acc,precision,recall,training_time


In [160]:
def DecisionTree(X_train,Y_train,X_test):
    start_time = time.time()
    model=DecisionTreeClassifier().fit(X_train,Y_train)
    end_time = time.time()
    training_time = end_time - start_time
    test_pred_model=model.predict(X_test)
    return test_pred_model,model,training_time
def eval_metric(model,test_pred_model,training_time):
    acc=accuracy_score(Y_test,test_pred_model)
    precision = precision_score(Y_test, test_pred_model,average='macro')
    recall = recall_score(Y_test, test_pred_model,average='macro')
    return acc,precision,recall,training_time

In [166]:
def LogRegression(X_train,Y_train,X_test):
    start_time = time.time()
    model=LogisticRegression()
    model.fit(X_train,Y_train)
    end_time = time.time()
    training_time = end_time - start_time
    y_pred=model.predict(X_test)
    return y_pred,model,training_time

def eval_metric(model,y_pred,training_time):
    acc=accuracy_score(Y_test,y_pred)
    precision = precision_score(Y_test, y_pred,average='macro')
    recall = recall_score(Y_test, y_pred,average='macro')
    return acc,precision,recall,training_time

In [172]:
def SVM(X_train,Y_train,X_test):
    start_time = time.time()
    model=svm.SVC(kernel='linear')
    model.fit(X_train, Y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    training_time = end_time - start_time
    return y_pred,model,training_time

def eval_metric(model,y_pred_class,training_time):
    acc=accuracy_score(Y_test,y_pred_class)
    precision = precision_score(Y_test, y_pred_class,average='macro')
    recall = recall_score(Y_test, y_pred_class,average='macro')
    return acc,precision,recall,training_time

In [177]:
def SVM_kernel(X_train,Y_train,X_test):
    start_time = time.time()
    model=svm.SVC(kernel='rbf')
    model.fit(X_train, Y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    training_time = end_time - start_time
    return y_pred,model,training_time

def eval_metric(model,y_pred_class,training_time):
    acc=accuracy_score(Y_test,y_pred_class)
    precision = precision_score(Y_test, y_pred_class,average='macro')
    recall = recall_score(Y_test, y_pred_class,average='macro')
    return acc,precision,recall,training_time

In [178]:
if __name__=="__main__":  
    y_pred,model,training_time=SVM_kernel(X_train,Y_train,X_test)
    acc,precision,recall,training_time=eval_metric(model,y_pred,training_time)
    print("Accuracy=",acc)
    print("Precision=",precision)
    print("Recall=",recall)
    print("Time(in sec)=",training_time)

Accuracy= 0.9666666666666667
Precision= 0.9629629629629629
Recall= 0.9583333333333334
Time(in sec)= 0.005040884017944336


In [179]:
experiment_name = "MLOps_CA_2"
run_name="SVM_kernel"
run_metrics = eval_metric(model, y_pred,training_time)
print(run_metrics)

(0.9666666666666667, np.float64(0.9629629629629629), np.float64(0.9583333333333334), 0.005040884017944336)


In [180]:
def create_experiment(experiment_name,run_name, run_metrics,model, 
                      run_params=None):
    import mlflow
    
    #mlflow.set_tracking_uri("http://localhost:5000") #uncomment this line if you want to use any database like sqlite as backend storage for model
    mlflow.set_experiment(experiment_name)
    
    with mlflow.start_run(run_name=run_name):
        
        if not run_params == None:
            for param in run_params:
                mlflow.log_param(param, run_params[param])
            
        if isinstance(run_metrics, (list, tuple)):
            for i, metric_value in enumerate(run_metrics):
                #mlflow.log_metric('Accuracy',)
                mlflow.log_metric(f'metric_{i}', float(metric_value))
        
        mlflow.sklearn.log_model(model, "model")
        
       
        
        mlflow.set_tag("tag1", "SVM_kernel")
        mlflow.set_tags({"tag2":"Randomized Search CV", "tag3":"Production"})
            
    print('Run - %s is logged to Experiment - %s' %(run_name, experiment_name))

In [181]:
create_experiment(experiment_name,run_name,run_metrics,model)



Run - SVM_kernel is logged to Experiment - MLOps_CA_2
