In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle
import joblib
import dagshub
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from evidently import Dataset, DataDefinition
from evidently import Report
from evidently import BinaryClassification
from evidently.presets import DataDriftPreset, ClassificationPreset
from datetime import datetime

In [None]:
import evidently
print(evidently.__version__)

In [None]:
#dagshub.init(repo_owner='King-David02', repo_name='Rock-vs-Mine', mlflow=True)

In [None]:
data = pd.read_csv('data/Sonar.csv')
data

In [None]:
data= data.rename(columns={'Label': 'Target'})
data

In [None]:
X = data.drop('Target', axis=1)
y = data['Target']

In [None]:
X

In [None]:
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
X_test

In [None]:
pipeline = make_pipeline(StandardScaler(),
                         RandomForestClassifier(n_estimators=100, random_state=42))

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test).astype(str)
sonar_data_prediction = pipeline.predict(data.drop('Target', axis=1))
joblib.dump(pipeline, 'model/pipeline.pkl')

In [None]:
y_pred

In [None]:
other_data = pd.read_csv('data/rock_vs_mine_dataset.csv')
other_data['prediction'] = pipeline.predict(other_data.drop('Target', axis=1))
other_data.to_csv('data/predicted_data/current_data.csv', index=False)

In [None]:
data['prediction'] = sonar_data_prediction
data.to_csv('data/predicted_data/reference_data.csv', index=False)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
definition = DataDefinition(classification=[
    BinaryClassification(
        target='Target',
        prediction_labels='prediction',
        labels={'R': 'Rock', 'M': 'Mine'},
        pos_label='R'
    )
])

In [None]:
reff_data = Dataset.from_pandas(data=data, data_definition=definition)
curr_data = Dataset.from_pandas(data=other_data, data_definition=definition)

In [None]:
report = Report(metrics=[
    DataDriftPreset(),
    ClassificationPreset()
])

In [None]:
report = report.run(current_data=curr_data, reference_data=reff_data)

In [None]:
from evidently.ui.workspace import Workspace
ws = Workspace.create('workspace')

In [None]:
project = ws.create_project('RvM_Dashboard')
project

In [None]:
ws.add_run(project.id, report)

In [None]:
'''st = StandardScaler()
X_train = st.fit_transform(X_train)
X_test = st.transform(X_test)'''

In [None]:
'''rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
joblib.dump(st, "scaler.pkl")
joblib.dump(rf, "rf_model.pkl")'''

In [None]:
'''with mlflow.start_run(run_name='Artifacts Method'):
    mlflow.autolog()
    st = StandardScaler()
    X_train = st.fit_transform(X_train)
    X_test = st.transform(X_test)
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    report_str = classification_report(y_test, y_pred)
    with open("classification_report.txt", "w") as f:
        f.write(report_str)
    mlflow.log_artifact("classification_report.txt")
    joblib.dump(st, "scaler.pkl")
    mlflow.log_artifact("scaler.pkl")
    joblib.dump(rf, "rf_model.pkl")
    mlflow.log_artifact("rf_model.pkl")
    #mlflow.sklearn.log_model(rf, name="model")'''

In [None]:
'''with mlflow.start_run(run_name='Random Forest'):
    mlflow.autolog()
    pipeline = make_pipeline(StandardScaler(),
                            RandomForestClassifier()
                            )
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    sample_data = X[:10]
    sample_prediction = pipeline.predict(sample_data)
    signature = infer_signature(sample_data, sample_prediction)
    report_str = classification_report(y_test, y_pred)
    with open("classification_report.txt", "w") as f:
        f.write(report_str)
    mlflow.log_artifact("classification_report.txt")
    mlflow.sklearn.log_model(pipeline, "model", signature=signature)
    #joblib.dump(pipeline, "pipeline.pkl")
    #mlflow.log_artifact("pipeline.pkl")'''

In [None]:
'''with open('model.pkl', 'wb') as file:
    pickle.dump(pipeline, file)'''

In [None]:
#print(mlflow.get_tracking_uri())

In [None]:
'''with mlflow.start_run(run_name='Logistic'):
    mlflow.autolog()
    pipeline2 = make_pipeline(
        StandardScaler(),
        LogisticRegression()
    )
    pipeline2.fit(X_train, y_train)
    prediction = pipeline2.predict(X_test)
    sample_data = X[:10]
    sample_prediction = pipeline2.predict(sample_data)
    signature2 = infer_signature(sample_data, sample_prediction)
    report_str_lr = classification_report(y_test, prediction)
    with open("lr_classification_report.txt", "w") as f:
        f.write(report_str_lr)
    mlflow.log_artifact("lr_classification_report.txt")
    #joblib.dump(pipeline2, "lr_model.pkl")
    #mlflow.log_artifact("lr_model.pkl")
    #mlflow.sklearn.log_model(pipeline2, "lrmodel", signature=signature2)'''

In [None]:
import sys
print(sys.executable)