In [8]:
#python packages
import dataframe_image as dfi
import pandas as pd 
import plotly.express as px
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import datetime
import numpy as np
import matplotlib.pyplot as plt

#Data import
Data_RDK1 = pd.read_csv('Data_RDK1.csv',sep=',',encoding='latin-1',engine='python',quotechar='"', error_bad_lines=False,index_col=0)

#making the attribute with the given timelimit
giv1 = pd.DataFrame({'date1': ['01-09-2020 12:00'] * 1041136})
giv2 = pd.DataFrame({'date': ['01-09-2020 13:00:00'] * 1041136})
giv1['date1'] = pd.to_datetime(giv1['date1'])#, format="%d-%m-%y %H:%M")
giv2['date'] = pd.to_datetime(giv2['date'])#, format="%d-%m-%y %H:%M:%S")
giv3 = pd.DataFrame(giv2['date']-giv1['date1'],columns=["Time_given"])

#Changing the date and times coherent to one datetime variable
Data_datetime_Activation = pd.to_datetime(Data_RDK1['Activation_date'] + ' ' + Data_RDK1['Activation_time'],format='%d/%m/%y %H:%M').rename("DateTime_Activation")
Data_datetime_sampling = pd.to_datetime(Data_RDK1['Sampling_date'] + ' ' + Data_RDK1['Sampling_time'],format='%d/%m/%y %H:%M').rename("DateTime_sampling")
Data_datetime_receipt = pd.to_datetime(Data_RDK1['date_of_receipt'] + ' ' + Data_RDK1['time_of_receipt'],format='%d/%m/%y %H:%M').rename('DateTime_receipt')
Data_datetime_analysis = pd.to_datetime(Data_RDK1['Analysis_date'] + ' ' + Data_RDK1['Analysis_time'],format='%d/%m/%y %H:%M').rename('DateTime_analysis')

#Concatenating the cleaned dataset 
Data_datetime = pd.concat([Data_RDK1['Sample_material_container_number'], Data_datetime_sampling, Data_datetime_receipt, Data_datetime_analysis, Data_datetime_Activation,Data_RDK1['Equipment'],Data_RDK1['quantity_name'],Data_RDK1['Sampling_circumstance'],Data_RDK1['Result']], axis=1, join='inner') 
Data_datetime['DateTime_receipt_sampling'] = Data_datetime["DateTime_receipt"].fillna(Data_datetime['DateTime_sampling'])
Data_datetime['Receipt_to_Analysis'] = (Data_datetime['DateTime_analysis'] - Data_datetime['DateTime_receipt_sampling'])
Data_datetime['Time_given'] = giv3

#Removing all negative samples from Receipt to analysis
#First seperate positive and negativ samples 
Data_datetime1=Data_datetime[(Data_datetime['Receipt_to_Analysis'] > "-0 days 00:00:01")] #all positive values 130.755
Data_datetime2=Data_datetime[(Data_datetime['Receipt_to_Analysis'] < "-0 days 00:00:01")] #all negative values 221 
#Then recalculation so all negativ samples become positive 
Data_datetime2.drop(['Receipt_to_Analysis'], axis='columns', inplace=True)
Data_datetime2['Receipt_to_Analysis'] = (Data_datetime2['DateTime_analysis'] - Data_datetime2['DateTime_sampling'])
Data_datetime3 = Data_datetime1.append(Data_datetime2)

#Removing all samples that where not analyzed on the track in september
dfa = Data_datetime3[(Data_datetime3['DateTime_receipt'] < "2020-10-01 00:00:00")] # samples received before 2020-10-01

#Receipt_to_Analysis - Time_to_Analysis_min
#calculate mean for each equipment and adding it to each row 
def add_mkt_return(grp):
  grp['receipt_Analysis_mean'] = grp['Receipt_to_Analysis'].mean()
  return grp

df = dfa.groupby('Equipment').apply(add_mkt_return)

#Adding the attributes to the dataframe:
df['Mean'] = np.where(df['Receipt_to_Analysis'] > df['receipt_Analysis_mean'], 'Over_mean', 'Under_mean')
df['Given'] = np.where(df['Receipt_to_Analysis'] > df['Time_given'], 'Over', 'Under')
df['Day'] = df['DateTime_sampling'].dt.day
df['time'] = df['DateTime_sampling'].dt.time
df['Weekday'] = df['DateTime_sampling'].dt.day_name()

#creating sub dataframes 
Data_timeline1 = df.groupby([pd.Grouper(key='DateTime_sampling', freq='D'),'Mean']).size().reset_index(name='count')
Data_timeline2 = df.groupby([pd.Grouper(key='DateTime_sampling', freq='D'),'Given']).size().reset_index(name='count')
Over_mean = Data_timeline1.loc[Data_timeline1['Mean'] == 'Over_mean']
Over_mean = Over_mean.rename(columns = {'count': 'Over_mean'}, inplace = False)
Over_mean = Over_mean.drop(columns=['Mean'])
Over_mean = Over_mean.set_index('DateTime_sampling')
Under_mean = Data_timeline1.loc[Data_timeline1['Mean'] == 'Under_mean']
Under_mean = Under_mean.rename(columns = {'count': 'Under_mean'}, inplace = False)
Under_mean = Under_mean.drop(columns=['Mean'])
Under_mean = Under_mean.set_index('DateTime_sampling')
Over_given = Data_timeline2.loc[Data_timeline2['Given'] == 'Over']
Over_given = Over_given.rename(columns = {'count': 'Over'}, inplace = False)
Over_given = Over_given.drop(columns=['Given'])
Over_given = Over_given.set_index('DateTime_sampling')
Under_given = Data_timeline2.loc[Data_timeline2['Given'] == 'Under']
Under_given = Under_given.rename(columns = {'count': 'Under'}, inplace = False)
Under_given = Under_given.drop(columns=['Given'])
Under_given['Day'] = Under_given['DateTime_sampling'].dt.day
Under_given['Weekday'] = Under_given['DateTime_sampling'].dt.day_name()
Under_given['Week'] = Under_given['DateTime_sampling'].dt.week
Under_given = Under_given.set_index('DateTime_sampling')
Data_timeline3 = pd.concat([Over_mean,Under_mean,Over_given,Under_given], axis=1, join='outer')
Data_timeline3["Over_mean%"] = (Data_timeline3["Over_mean"]/(Data_timeline3["Over_mean"]+Data_timeline3["Under_mean"]))*100
Data_timeline3["Under_mean%"] = (Data_timeline3["Under_mean"]/(Data_timeline3["Over_mean"]+Data_timeline3["Under_mean"]))*100
Data_timeline3["Over%"] = (Data_timeline3["Over"]/(Data_timeline3["Over"]+Data_timeline3["Under"]))*100
Data_timeline3["Under%"] = (Data_timeline3["Under"]/(Data_timeline3["Over"]+Data_timeline3["Under"]))*100

#New sub dataframe
Datetime = df.groupby([pd.Grouper(key='DateTime_sampling', freq='H'),'Given']).size().reset_index(name='count')
Over_given = Datetime.loc[Datetime['Given'] == 'Over']
Over_given = Over_given.rename(columns = {'count': 'Over'}, inplace = False)
Over_given = Over_given.drop(columns=['Given'])
Over_given = Over_given.set_index('DateTime_sampling')
Under_given = Datetime.loc[Datetime['Given'] == 'Under']
Under_given = Under_given.rename(columns = {'count': 'Under'}, inplace = False)
Under_given = Under_given.drop(columns=['Given'])
Under_given['Day'] = Under_given['DateTime_sampling'].dt.day
Under_given['time'] = Under_given['DateTime_sampling'].dt.time
Under_given['Weekday'] = Under_given['DateTime_sampling'].dt.day_name()
Under_given['Week'] = Under_given['DateTime_sampling'].dt.week
Under_given = Under_given.set_index('DateTime_sampling')
Datetime0 = pd.concat([Over_given,Under_given], axis=1, join='outer')
Datetime0.sort_values('time')

#New sub dataframe
Sampling_circumstance2 = df.groupby([pd.Grouper(key='Sampling_circumstance'),'Given']).size().reset_index(name='count')
Over1 = Sampling_circumstance2.loc[Sampling_circumstance2['Given'] == 'Over']
Over1 = Over1.rename(columns = {'count': 'Over'}, inplace = False)
Over1 = Over1.drop(columns=['Given'])
Under1 = Sampling_circumstance2.loc[Sampling_circumstance2['Given'] == 'Under']
Under1 = Under1.rename(columns = {'count': 'Under'}, inplace = False)
Under1 = Under1.drop(columns=['Given'])
Sampling_circumstance3 = pd.merge(Under1,Over1, how='left', on=['Sampling_circumstance'])
Sampling_circumstance3["Over%"] = (Sampling_circumstance3["Over"]/(Sampling_circumstance3["Over"]+Sampling_circumstance3["Under"]))*100
Sampling_circumstance3["Under%"] = (Sampling_circumstance3["Under"]/(Sampling_circumstance3["Over"]+Sampling_circumstance3["Under"]))*100

#New sub dataframe
Equipment = df.groupby([pd.Grouper(key='Equipment'),'Given']).size().reset_index(name='count')
Over2 = Equipment.loc[Equipment['Given'] == 'Over']
Over2 = Over2.rename(columns = {'count': 'Over'}, inplace = False)
Over2 = Over2.drop(columns=['Given'])
Under2 = Equipment.loc[Equipment['Given'] == 'Under']
Under2 = Under2.rename(columns = {'count': 'Under'}, inplace = False)
Under2 = Under2.drop(columns=['Given'])
Equipment1 = pd.merge(Under2,Over2, how='left', on=['Equipment'])
Equipment1["Over%"] = (Equipment1["Over"]/(Equipment1["Over"]+Equipment1["Under"]))*100
Equipment1["Under%"] = (Equipment1["Under"]/(Equipment1["Over"]+Equipment1["Under"]))*100


#_________________________________________The application______________________________________________________

#Simpel version with multiple plots

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

#to downloade html file
import io
from base64 import b64encode
buffer = io.StringIO()
html_bytes = buffer.getvalue().encode()
encoded = b64encode(html_bytes).decode()


fig = px.bar(Data_timeline3, 
              x="Day", 
              y=["Over","Under"],
              color_discrete_sequence=["indianred", "palegreen"],
              hover_name='Weekday',
              labels={'value':'Number of samples'})
fig1 = px.bar(Datetime0, 
              x="time", 
              y=["Over","Under"],
              color_discrete_sequence=["indianred", "palegreen"],
              hover_name='Day',
              labels={'value':'Number of samples'})
fig2 = px.bar(Datetime0, 
              x="time", 
              y=["Over","Under"],
              color_discrete_sequence=["indianred", "palegreen"],
              hover_name='Day',
              facet_row="Week",
              facet_col="Weekday",
              category_orders={"Weekday":['Monday','Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']}, 
              height=800,
              labels={'value':'No. of samples'})
fig5 = px.bar(Sampling_circumstance3, 
              x="Sampling_circumstance", 
              y=["Over","Under"], 
              color_discrete_sequence=["indianred", "palegreen"],
              labels={'value':'Number of samples'})

fig6 = px.bar(Sampling_circumstance3, 
              x="Sampling_circumstance", 
              y=["Over%","Under%"], 
              color_discrete_sequence=["indianred", "palegreen"],
              labels={'value':'Percentage of samples within each circumstance'})
fig7 = px.bar(Equipment1, 
              x="Equipment", 
              y=["Over%","Under%"], 
              color_discrete_sequence=["indianred", "palegreen"],
              labels={'value':'Percentage of samples within each equipment'})

app.layout = html.Div(children=[
    # First element 
    html.Div([
        html.H1("Web Application Locating bottlenecks in GLP track system", style={'text-align': 'center'}),
        
        html.H3(children='Samples analyzed in September'),
        html.P(children=['Barplot: That shows the number of samples analyzed on the track throughout the month.', html.Br(), 
            'The green bars indicates the number of samples that are analyzed within one hour from the sample are received in the laboratory.', html.Br(), 
            'The red bars indicates the number of samples that did not meet the requirements of the sample being analyzed within one hour of being recievd in the laboratory.']),

        dcc.Graph(
            id='graph',
            figure=fig
        ),  
    ], className='row'),
    # 2. element
    html.Div([
        html.H3(children='The number of samples analyzed for each hour'),

         html.P(children=['Barplot: That shows the number of samples for each hour throughout a day, for september month.', html.Br(), 
            'The green bars indicates the number of samples that are analyzed within one hour from the sample are received in the laboratory.', html.Br(), 
            'The red bars indicates the number of samples that did not meet the requirements of the sample being analyzed within one hour of being recievd in the laboratory.', html.Br(),
            'All days are layed on top of each other, the hover name, idicates which data belong to each day of the month.'
                         ]),

        dcc.Graph(
            id='graph1',
            figure=fig1
        ),  
    ], className='row'),
    ## 3. element
    html.Div([
        html.H4(children='The number of samples analyzed for each hour, for each day in september'),

        html.P(children=['Barplot: That shows the number of samples for each hour for each day in september.', html.Br(), 
            'The green bars indicates the number of samples that are analyzed within one hour from the sample are received in the laboratory.', html.Br(), 
            'The red bars indicates the number of samples that did not meet the requirements of the sample being analyzed within one hour of being recievd in the laboratory.', html.Br(),
            'The layout shows the days on the x-axis and each week on the y-axis, thereby does the plot follow a calender visualization.'
                         ]),

        dcc.Graph(
            id='graph2',
            figure=fig2
        ),  
    ], className='row'),
    ## 5. element
    html.Div([
        html.H3(children='The distribution for where the samples is collected'),

        html.P(children=['Barplot: That shows the distribution for where the samples is collected.', html.Br(), 
            'The green bars indicates the number of samples that are analyzed within one hour from the sample are received in the laboratory.', html.Br(), 
            'The red bars indicates the number of samples that did not meet the requirements of the sample being analyzed within one hour of being recievd in the laboratory.'
                         ]),

        dcc.Graph(
            id='graph5',
            figure=fig5
        ),  
    ], className='row'),
    ## 6. element 
    html.Div([
        html.H3(children='The percentage distribution'),

        html.P(children=['Barplot: That shows the percentage distribution for where the samples is collected.', html.Br(), 
            'The green bars indicates the percentage of samples that are analyzed within one hour from the sample are received in the laboratory.', html.Br(), 
            'The red bars indicates the percentage of samples that did not meet the requirements of the sample being analyzed within one hour of being recievd in the laboratory.'
            ]),

        dcc.Graph(
            id='graph6',
            figure=fig6
        ),  
    ], className='row'),
    ## 7. element
    html.Div([
        html.H2(children='The percentage distribution of the Equipment'),

        html.P(children=['Barplot: That shows the percentage distribution for each equipment.', html.Br(), 
            'The green bars indicates the percentage of samples that are analyzed within one hour from the sample are received in the laboratory.', html.Br(), 
            'The red bars indicates the percentage of samples that did not meet the requirements of the sample being analyzed within one hour of being recievd in the laboratory.'
            ]),

        dcc.Graph(
            id='graph7',
            figure=fig7
        ),  
    ], className='row')
])

if __name__ == '__main__':
    app.run_server()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [28/May/2021 14:09:53] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [28/May/2021 14:09:54] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [28/May/2021 14:09:54] "GET /_dash-layout HTTP/1.1" 200 -
