In [None]:
%pip install plotly seaborn

In [None]:
import pandas as pd
import seaborn as sns
import plotly.express as px 
import plotly.io as pio
import matplotlib.pyplot as plt

from collections import defaultdict
from datetime import date
from matplotlib.colors import ListedColormap, BoundaryNorm

In [None]:
from environment import dh, pio_renderer
if pio_renderer is not None:
    pio.renderers.default = pio_renderer

# **Variables**

In [None]:
PROJECT_NAME = "AreaVerde"

# **Functions**

In [None]:
def Heatmap(data):
  plt.figure(figsize=(16, 12))
  colors = ['#d30505','#d35400','#e4ed28', '#319022']
  cmap = ListedColormap(colors)
  bounds = [-0.5, 0.5, 1.5, 2.5, 3.5]
  norm = BoundaryNorm(bounds, cmap.N)

  sns.heatmap(
      data,
      cmap=cmap,
      norm=norm,
      linewidths=0.6,
      annot=False,
      fmt='d',
      cbar=False,
  )

  plt.tight_layout()
  plt.show()

In [None]:
def plot_donwtime(results, title): 
    all_data = []

    for i, spira in enumerate(results.keys()):
        all_data.extend({
            'spira': spira,
            'time': time
        } for time in results[spira])
        
    df = pd.DataFrame(all_data)
                    
    fig = px.scatter(
        df, 
        x='time', 
        y='spira', 
        title=title, 
        labels={'time': 'Time', 'spira': 'Spira'}, 
        color='spira'  
    )
    fig.update_layout(
        width=1500,  
        height=900,  
        title_font_size=20,
    )
    fig.show()

# **Load Data**

In [None]:
# Load data
project = dh.get_or_create_project(PROJECT_NAME)

spira = project.get_dataitem("spira_flow_data_2024").as_df()
spira['DateTime'] = pd.to_datetime(spira['DateTime'])
accuracy = project.get_dataitem("spira_accur_data_2024").as_df()
accuracy['DateTime'] = pd.to_datetime(accuracy['DateTime'])

In [None]:
accuracy_pivot  = accuracy.copy()
accuracy_pivot = accuracy_pivot.pivot_table(index='DateTime', columns='spira_code', values='count')

# **Manage Nan Values**

In [None]:
# TODO: For this part we could Analyze values before and after nan values and fill them with reasonable value instead of zero.
# NOTE: Does it mean spira did not work properly at those time? (spira was not off but still not recording the data with any reason)  

In [None]:
nan_count = accuracy_pivot.isna().sum()
for spira_code, count in nan_count.items():
    if count !=0:
        print(spira_code, count)

In [None]:
accuracy_pivot = accuracy_pivot.fillna(0)

# **Seleceted spira and Date**

In [None]:
start_date = date(2024,7,1)
end_date = date(2024,7,30)
selected_spira_acc = accuracy_pivot.columns.tolist()[200:250]

# **General Heatmap**

In [None]:
accuracy_status = pd.DataFrame(index=accuracy_pivot.index, columns=accuracy_pivot.columns)

accuracy_status[(accuracy_pivot >= 90)] = 'green'  
accuracy_status[(accuracy_pivot > 0) & (accuracy_pivot < 90)] = 'yellow' 
accuracy_status[(accuracy_pivot == -1)] = 'orange'
accuracy_status[(accuracy_pivot == 0)] = 'red'      
color_mapping = {'red': 0, 'orange': 1, 'yellow': 2, 'green': 3}
accuracy_status = accuracy_status.replace(color_mapping)

In [None]:
selected_acc_status = accuracy_status[start_date:end_date][selected_spira_acc]

In [None]:
Heatmap(selected_acc_status)                                                                           

# **Extra Spiras in Accurcy File**

In [None]:
accuracy_codes = accuracy['spira_code'].unique().tolist()
len(accuracy_codes)

In [None]:
spira_codes = spira['spira_code'].unique().tolist()
len(spira_codes)

In [None]:
extra_spira_code = [code for code in spira_codes if code not in accuracy_codes]
extra_acc_code = [code for code in accuracy_codes if code not in spira_codes]
print(f"we have {len(extra_spira_code)} spira that exist in spira file but not in accuracy file")
print(f"we have {len(extra_acc_code)} spira that exist in accuracy file but not in spira file")

In [None]:
extra_acc = accuracy_pivot[extra_acc_code]

In [None]:
extra_acc

In [None]:
extra_acc =extra_acc.reset_index()
extra_acc['date'] = extra_acc['DateTime'].dt.date
extra_acc = extra_acc.drop(columns='DateTime')
extra_acc = extra_acc.groupby('date').mean().astype(int)
extra_acc


In [None]:
extra_acc_status = pd.DataFrame(index=extra_acc.index, columns=extra_acc.columns)

extra_acc_status[(extra_acc >= 90)] = 'green'  
extra_acc_status[(extra_acc > 0) & (extra_acc < 90)] = 'yellow' 
extra_acc_status[(extra_acc == -1)] = 'orange'
extra_acc_status[(extra_acc == 0)] = 'red'      
color_mapping = {'red': 0, 'orange': 1, 'yellow': 2, 'green': 3}
extra_acc_status = extra_acc_status.replace(color_mapping)

In [None]:
Heatmap(extra_acc_status)

# **Spira OFF Time**

Note: This section assumes an accuracy of -1 is for the times when the spira was off (as well as values < 100%)

In [None]:
results_neg_one = defaultdict(list)
results_less_than_100 = defaultdict(list)
for spira_id in accuracy_pivot.columns:
    is_neg_one = accuracy_pivot[spira_id] == -1
    is_less_than_100 = accuracy_pivot[spira_id] < 100
    for timestamp, value in is_neg_one.items():
        if value:
            results_neg_one[spira_id].append(timestamp)
    for timestamp, value in is_less_than_100.items():
        if value:
            results_less_than_100[spira_id].append(timestamp)

In [None]:
plot_donwtime(results_neg_one, 'Spira OFF Time (-1)')

In [None]:
max_spire=50
plot_donwtime({k : results_less_than_100[k] for k in list(results_less_than_100.keys())[:max_spire+1]}, 
              title='Spira OFF Time (<100%)')