## Evaluation Pipeline

### Import Libraries

In [None]:
import os
import shutil
import pickle
import pandas as pd
import numpy as np

# Plotting libraries
import plotly.express as px

# Custom libraries
import src.helpers_preprocess as pp
import src.helpers_mlflow as mlf
import src.config as config

import importlib
importlib.reload(scrap)

In [None]:
# Check if filepaths exists and create filepaths if do not exist
# Explainations for filepaths can be found in config.py
config.create_path(config.main_directory)
config.create_path(config.output_path)
config.create_path(config.raw_data_path)
config.create_path(config.inference_output)

##### Import latest dataset and predictions

In [None]:
curr_feats = config.get_latest_csv(config.raw_data_path, 'full_features')

In [None]:
curr_pred = config.get_latest_csv(config.inference_output, 'prediction').drop(columns=["Unnamed: 0"])

##### Group by DataSource

In [None]:
# Data Transformation for buildingg graphs
# Count of incompliant posts by the datasource
source_incompliant_cnt = pd.DataFrame(curr_feats.groupby(['data_source', 'incompliant']).agg('size')).rename(columns={0: 'count'}).reset_index()
source_incompliant_cnt.replace({"incompliant": {1: 'incompliant', 0: 'compliant'}}, inplace=True)

ttl_source_incompliant_cnt = source_incompliant_cnt[source_incompliant_cnt.incompliant == 'incompliant'][['data_source', 'count']].reset_index(drop=True)


In [None]:
# Bar Chart - compliant / incompliant post
fig1 = px.bar(source_incompliant_cnt, x='data_source', y='count', color='incompliant', barmode='group')

# Line chart - total incompliant posts
line_chart = go.Scatter(x=ttl_source_incompliant_cnt['data_source'], y=ttl_source_incompliant_cnt['count'], mode='lines', name='Total Incompliant Posts')
fig1.add_trace(line_chart)

fig1.update_layout(title_text="Post Count by Data Sources", height=600)
fig1.update_layout(legend=dict(
    title= "",
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))
fig1.update_layout(
    {
        "paper_bgcolor": "rgba(0, 0, 0, 0)",
        "plot_bgcolor": "rgba(0, 0, 0, 0)",
    }
)
fig1.update_xaxes(title_text="Data Source")
fig1.update_yaxes(title_text="Count of Posts", showgrid=True, gridwidth=2, gridcolor='DarkGrey')

fig1.show()