In [1]:
import pandas as pd
import json
from pathlib import Path
from dotenv import load_dotenv
import os
from sqlalchemy import create_engine
from datetime import datetime
import random 
from pydidask.utils import RenderJSON

ID_LIST_BUGS = 210777287
PATH_CREDS = Path("creds/.env")

_ = load_dotenv(dotenv_path=PATH_CREDS)
# pip install sqlalchemy snowflake-sqlalchemy

In [2]:
def create_snowflake_engine(
        database, schema,
        user=os.getenv("SNOWFLAKE_USER"), 
        password=os.getenv("SNOWFLAKE_PASSWORD"), 
        account=os.getenv("SNOWFLAKE_ACCOUNT"), 
        warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"), 
        role=os.getenv("SNOWFLAKE_ROLE")):
    engine = create_engine(f'snowflake://{user}:{password}@{account}/{database}/{schema}?warehouse={warehouse}&role={role}')
    return engine

In [3]:
database =os.getenv("SNOWFLAKE_DATABASE_RAW")
schema = os.getenv("SNOWFLAKE_SCHEMA_CLICKUP")

engine = create_snowflake_engine(database,schema)
connection = engine.connect()

In [4]:
query_clickup_tasks = "SELECT * FROM TASK"

df_tasks = pd.read_sql_query(query_clickup_tasks, con=connection)
df_bugs = df_tasks[df_tasks["list_id"] == ID_LIST_BUGS]

b_open = (df_bugs["status_status"].str.lower() != 'closed')
df_bugs_open = df_bugs[b_open]

In [6]:
print(f"number of tasks: {len(df_tasks)}")
print(f"number of bugs: {len(df_bugs)}")
print(f"number of bugs open: {len(df_bugs_open)}")

number of tasks: 2339
number of bugs: 1280
number of bugs open: 49


In [7]:
i = random.randint(0, len(df_bugs_open))
task_id = df_bugs_open["id"].iloc[i]
task_name = df_bugs_open["name"].iloc[i]
print(f'Task ID: {task_id}')
print(f"URL: https://app.clickup.com/t/{task_id}")
print(f"task name: '{task_name}'")

Task ID: 8693aytk7
URL: https://app.clickup.com/t/8693aytk7
task name: '[CFPPA] Lenteurs de l'app à l’upload de PDF au-delà de 12MO'


In [8]:
c_fields= json.loads(df_bugs_open["custom_fields"].iloc[i])
fields_interest = ["Type d'anomalie", "Source du problème", "Type de problème", "client impacté", "Url de reproduction"]
c_fields_f = [e for e in c_fields if e["name"] in fields_interest]

In [9]:
f_name = "Type de problème"
_ = RenderJSON([e for e in c_fields_f if e["name"] == f_name][0])

In [26]:
query_access_fields = "SELECT * FROM ACCESSIBLE_CUSTOM_FIELD"

df_access_fields = pd.read_sql_query(query_access_fields, con=connection)

In [27]:
b = df_access_fields["name"] == "Type d'anomalie"
a_field = json.loads(df_access_fields[b]["type_config"].iloc[0])

In [28]:
a_field

{'default': 0,
 'new_drop_down': True,
 'options': [{'color': '#800000',
   'id': '1191f916-90a8-4df8-a3ee-25b19d135b89',
   'name': "Anomalie bloquante (la plateforme, ou une de ses fonctionnalités n'est plus accessible)",
   'orderindex': 0},
  {'color': '#e50000',
   'id': '5f25748d-c9c1-4443-8dcd-724e38b63af2',
   'name': "Anomalie majeure (altère très gravement, sans l’interrompre, l'utilisation d'une fonctionalité)",
   'orderindex': 1},
  {'color': '#9b59b6',
   'id': 'f69f1d25-e616-46fc-9255-8741196c5d01',
   'name': "Anomalie mineure (altère sans l’interrompre  l'utilisation d'une fonctionalité)",
   'orderindex': 2},
  {'color': '#81B1FF',
   'id': 'ce6d9aa7-af98-42c9-b304-db6d7cf94d79',
   'name': 'Autre (demandes, questions)',
   'orderindex': 3}],
 'placeholder': None}

## query prod data

In [4]:
database = "ANALYTICS"
schema = "PRODUCTION"

engine = create_snowflake_engine(database,schema)
connection = engine.connect()

In [7]:
df_bugs = pd.read_sql_query("SELECT * FROM fct_bugs_open", con=connection)

In [15]:
tmp = pd.DataFrame({'info': ["problem_severity_id", "problem_type_id", "problem_source_id", "client_impacted_id", "url_impacted"]})
tmp["taux de completion"] = tmp["info"].apply(lambda c: (~df_bugs[c].isnull()).mean())
tmp.style.format({"taux de completion": '{:.0%}'})

Unnamed: 0,info,taux de completion
0,problem_severity_id,100%
1,problem_type_id,8%
2,problem_source_id,10%
3,client_impacted_id,98%
4,url_impacted,96%


In [18]:
for c in df_bugs["url_impacted"]:
    print(c)

https://herosdiscrets.didask.com/studio/projects/6547c9fa0072a5a902a10127
http://casepassedanslesmails.didask.com
https://academie-supexpertise.didask.com/programs/635a9a9383e1d739c27228af?isPreview=true
https://cfpc2030.didask.com/studio/projects/654a69f321850a6a1996544d?granuleId=656da17013f5a537ed6cf0be&topicId=656da17013f5a537ed6cf0bd&drawer=edition&isEditing=true
https://orange-groupe.didask.com/api/studio/courses/637e3d21cea814691e265811/analytics/users/export?groupsIds=6491cdfe8a0b905818961fbf&groupsIds=6491ce108a0b905818962005&groupsIds=6491ce688a0b9058189620d6&groupsIds=64faebe388d227fd50233669&groupsIds=651e971c63f16d5e65325033&withLearnersNotInAnyGroup=true&searchTerm=
https://superadmin.didask.com/workspaces/production/65685f2fe7d8ec3e78b52099/settings
https://telemaque.didask.com/studio/projects/64413df1d22615a77453dea5/publications/64c8eb761118a152bf7fcf9e
https://herosdiscrets.didask.com/studio/projects/6547c9fa0072a5a902a10127/topics/6579873b06562d8881d7b489
https://edh

In [12]:
df_bugs.columns

Index(['task_id', 'name', 'description', 'status_type', 'status_status',
       'list_id', 'archived', 'custom_fields', 'date_created', 'date_closed',
       'url', 'problem_severity_id', 'problem_type_id', 'problem_source_id',
       'client_impacted_id', 'url_impacted', 'problem_severity_label',
       'problem_source_label', 'problem_type_label'],
      dtype='object')

## close connection

In [None]:
connection.close()

## analytics db

In [63]:
database = "ANALYTICS"
schema = "DBT_SRABOUDI"
engine = create_snowflake_engine(database,schema)
connection = engine.connect()

In [64]:
query_tickets = "SELECT * FROM FCT_CLICKUP_TASKS_SUPPORT"
df = pd.read_sql_query(query_tickets, con=connection)

In [71]:
b1 = df["created_date_paris"] >= "2023-12-01"
b2 = df["created_date_paris"] < "2024-01-01"
df[b1 & b2].sort_values('name', ascending=False)[["name"]]

Unnamed: 0,name
716,"🐲 [CFPPA] Impossible de modifier les groupes, ..."
993,l'ordering ne fonctionne pas bien coté aprrena...
328,[WISPER/AIRCALL] Impossible de dupliquer la pu...
335,[Vérouillage de l'IA en backoffice] Aucun chan...
697,[Traductions XLF] Manque de traduction de l'au...
...,...
715,Affichage du module en mode liste : quand je c...
714,Affichage du module en mode liste : quand je c...
144,"Affichage de la publication : ""le chapitre 1 r..."
286,Affichage de l'analyse d'image cassé ?


In [72]:
zz = df[b1 & b2].sort_values('name', ascending=False)

In [78]:
len(zz["name"].unique())

102

In [60]:
b1 = df["created_date_paris"] >= "2023-12-01"
b2 = df["created_date_paris"] < "2024-01-01"
bugs_dec = df[b1 & b2].sort_values('created_date_paris', ascending=False)

In [61]:
def taux_completion_champs(df, champs):
    tmp = pd.DataFrame({'info': champs})
    tmp["taux de completion"] = tmp["info"].apply(lambda c: (~df[c].isnull()).mean())
    return tmp.style.format({"taux de completion": '{:.0%}'})

In [62]:
chps = ["problem_severity_label", "problem_type_label", "problem_source_label", "customer_label", "url_reproduction"]
taux_completion_champs(bugs_dec, chps)

Unnamed: 0,info,taux de completion
0,problem_severity_label,99%
1,problem_type_label,6%
2,problem_source_label,7%
3,customer_label,99%
4,url_reproduction,98%


## accessible custom fields

In [37]:
database = "RAW"
schema = "CLICKUP"
engine = create_snowflake_engine(database,schema)
connection = engine.connect()


query = "select * from accessible_custom_field"
df = pd.read_sql_query(query, con=connection)

In [40]:
df.head(1)

Unnamed: 0,id,type_config,_fivetran_deleted,list_id,date_created,hide_from_guests,name,type,_fivetran_synced
0,94ccf034-399f-4c2e-884a-8babf3f2c3f6,"{\n ""options"": [\n {\n ""color"": ""#e50...",False,210777287,1702901196433,True,Type de problème,drop_down,2024-01-09 09:26:16.171000+00:00


In [44]:
bb = df["name"] == "Url de reproduction"

In [45]:
df[bb]

Unnamed: 0,id,type_config,_fivetran_deleted,list_id,date_created,hide_from_guests,name,type,_fivetran_synced
52,7b87e793-318f-41b8-9f5c-23feb1b647fb,{},False,210777287,1665411271852,False,Url de reproduction,url,2024-01-09 09:26:16.171000+00:00


In [49]:
df[df["name"] == "Who are you? :D"]

Unnamed: 0,id,type_config,_fivetran_deleted,list_id,date_created,hide_from_guests,name,type,_fivetran_synced


In [52]:
df["name"].unique()

array(['Type de problème', 'Lien IC', 'Source du problème',
       'Phase du Customer Journey (Circle)',
       'Est-ce que le bug est bloquant ?', 'Statut', 'Valeur / Priorité',
       'Images', 'Close Date', 'Handoff status', 'Project Code', 'Owner',
       'Points (custom)', 'Company Revenue', 'Payment Status',
       'Document (facultatif)', 'Où a-t-il été détecté ?',
       'Employee Count', "J'ai réussi à le reproduire",
       'Related projects', 'Quarter ', 'Septembre 2023', 'Progression',
       'Completed by', 'Domaine', 'Owners', 'Payment Type', 'Team',
       'Notes', 'In-cycle devs', 'Date livraison effective', 'Segment',
       'Account Size (Customer effort)', '- Évitable avec un test ?',
       'Incertitude', 'Thème ', 'client impacté', 'Appartient',
       'Delivery Done/Deadline', 'Cycle duration', 'Type',
       'Supported diffusion modes ', 'Email', 'TYPE', 'Date début',
       'Phase Design', "Type d'anomalie", 'Url de reproduction',
       'Estimation en jours', '

## taux de completion depuis creation des fields

In [31]:
from datetime import datetime

In [93]:
database_clickup = "RAW"
schema_clickup = "CLICKUP"
engine_clickup = create_snowflake_engine(database_clickup,schema_clickup)
connection_clickup = engine_clickup.connect()
query_fields = "select * from accessible_custom_field"
df_fields = pd.read_sql_query(query_fields, con=connection_clickup)

database_prod = "ANALYTICS"
schema_prod = "PRODUCTION"
engine_prod = create_snowflake_engine(database_prod, schema_prod)
connection_prod = engine_prod.connect()
query_tickets = "SELECT * FROM fct_clickup_tasks_support"
df_tickets = pd.read_sql_query(query_tickets, con=connection_prod)

In [98]:
column_lookup_dict = {
    "Url de reproduction": "url_reproduction",
    'Type de problème': "problem_type_label",
    'Source du problème': "problem_source_label",
    'client impacté': "customer_label",
    "Type d'anomalie": "problem_severity_label"
    }

df_fields["created_date"] = df_fields["date_created"].apply(lambda x: datetime.fromtimestamp(x/1000))
fields = ['Url de reproduction', 'Type de problème', 'Source du problème', 'client impacté', "Type d'anomalie"]
b = df_fields["name"].isin(fields)
df_res = df_fields[b][["name","created_date"]]
fields = df_res["name"]

print(f"fields: {fields}")
df_res

fields: 0        Type de problème
2      Source du problème
39         client impacté
51        Type d'anomalie
52    Url de reproduction
Name: name, dtype: object


Unnamed: 0,name,created_date
0,Type de problème,2023-12-18 13:06:36.433
2,Source du problème,2023-12-18 13:11:09.155
39,client impacté,2023-10-19 17:13:24.432
51,Type d'anomalie,2022-10-10 16:14:08.181
52,Url de reproduction,2022-10-10 16:14:31.852


In [100]:
list_nb_tickets_closed = list()
list_pct_completion = list()

for fld in fields:
    print(f"current field: '{fld}'")
    date_cr = df_fields.loc[df_fields["name"] == fld, "created_date"].iloc[0]
    print(f"field created at: {date_cr}")
    b_dedup_tasks = ~df_tickets["task_id"].duplicated(keep='first')
    b_closed_since = df_tickets["closed_date_paris"] >= date_cr
    nb_tickets_closed_since = sum(b_dedup_tasks & b_closed_since)
    print(f"nb tickets closed since: {nb_tickets_closed_since}")
    #nb_fields_completed = 
    #nb_field_completed = tmp[""]

    col = column_lookup_dict[fld]
    print(f"equiv column: {col}")
    b_field_completed = ~df_tickets[col].isnull()
    nb_fields_completed = sum(b_dedup_tasks & b_closed_since & b_field_completed)
    pct_completion = nb_fields_completed/nb_tickets_closed_since
    print(f'number of fields completed: {nb_fields_completed}')
    print(f'pct of completion: {pct_completion:.0%}')
    print('----')

    list_nb_tickets_closed.append(nb_tickets_closed_since)
    list_pct_completion.append(pct_completion)


df_res["nb_tickets_closed_since"] = list_nb_tickets_closed
df_res["pct_completion"] = list_pct_completion

current field: 'Type de problème'
field created at: 2023-12-18 13:06:36.433000
nb tickets closed since: 34
equiv column: problem_type_label
number of fields completed: 5
pct of completion: 15%
----
current field: 'Source du problème'
field created at: 2023-12-18 13:11:09.155000
nb tickets closed since: 34
equiv column: problem_source_label
number of fields completed: 5
pct of completion: 15%
----
current field: 'client impacté'
field created at: 2023-10-19 17:13:24.432000
nb tickets closed since: 421
equiv column: customer_label
number of fields completed: 314
pct of completion: 75%
----
current field: 'Type d'anomalie'
field created at: 2022-10-10 16:14:08.181000
nb tickets closed since: 1245
equiv column: problem_severity_label
number of fields completed: 1228
pct of completion: 99%
----
current field: 'Url de reproduction'
field created at: 2022-10-10 16:14:31.852000
nb tickets closed since: 1245
equiv column: url_reproduction
number of fields completed: 943
pct of completion: 76%
-

In [101]:
df_res = df_res.rename(columns={"name": "field_name", "created_date": "field_creation_date"})
df_res["field_creation_date"] = df_res["field_creation_date"].dt.strftime("%Y-%m-%d")

In [102]:
df_res.style.format({"pct_completion": '{:.0%}'})

Unnamed: 0,field_name,field_creation_date,nb_tickets_closed_since,pct_completion
0,Type de problème,2023-12-18,34,15%
2,Source du problème,2023-12-18,34,15%
39,client impacté,2023-10-19,421,75%
51,Type d'anomalie,2022-10-10,1245,99%
52,Url de reproduction,2022-10-10,1245,76%


In [109]:
b_url_set = df_tickets["url_reproduction"].apply(lambda x: x is not None)
df_tickets["url_reproduction_category"] = ""
df_tickets[["url_reproduction", "url_reproduction_category"]][b_url_set]

In [111]:
df_tickets[b_url_set].to_csv("~/Desktop/url_de_reproduction.csv", index=False)