In [8]:
import pandas as pd 
df=pd.read_csv("filtered_dataset3.csv", encoding='latin1')
df.dropna(inplace=True)
df = df[['PREDICATE', 'SUBJECT_NAME', 'OBJECT_NAME']]
filtre = ['METHOD_OF','TREATS', 'ADMINISTERED_TO', 'USES']
df = df[df['PREDICATE'].isin(filtre)].drop_duplicates()
print(df)

       PREDICATE                          SUBJECT_NAME  \
7         TREATS                     trastuzumab|ERBB2   
8         TREATS                           anastrozole   
9         TREATS                             Tamoxifen   
10        TREATS                 Immunologic Adjuvants   
16        TREATS  Autologous Stem Cell Transplantation   
...          ...                                   ...   
252726    TREATS                                 CT-P6   
252742    TREATS    Stereotactic Ablative Radiotherapy   
252761    TREATS                              Prodrugs   
252771    TREATS                  Aromatase Inhibitors   
252836    TREATS                      beta-thujaplicin   

                         OBJECT_NAME  
7       Malignant neoplasm of breast  
8       Malignant neoplasm of breast  
9       Malignant neoplasm of breast  
10      Malignant neoplasm of breast  
16          Breast cancer metastatic  
...                              ...  
252726     Carcinoma breast st

In [2]:
import random
import networkx as nx
import pandas as pd

G = nx.Graph()
for _, row in df.iterrows():
    G.add_edge(row['SUBJECT_NAME'], row['OBJECT_NAME'], relation=row['PREDICATE'])

def random_walk(graph, start_node, num_steps):
    current_node = start_node
    visited_nodes = {current_node: 1}
    
    for _ in range(num_steps):
        neighbors = list(graph.neighbors(current_node))
        if not neighbors:
            break
        next_node = random.choice(neighbors)
        if next_node in visited_nodes:
            visited_nodes[next_node] += 1
        else:
            visited_nodes[next_node] = 1
        current_node = next_node
    
    return visited_nodes

def calculate_factuality_scores(df, num_steps=1000):
    scores = {}
    
    subject_nodes = df['SUBJECT_NAME'].unique()
    
    for subject in subject_nodes:
        visited = random_walk(G, subject, num_steps)
        
        for _, row in df.iterrows():
            subject = row['SUBJECT_NAME']
            obj = row['OBJECT_NAME']
            total_visits = sum(visited.values())
            score = visited.get(obj, 0) / total_visits if total_visits > 0 else 0
            scores[(subject, obj)] = score
    
    return scores

factuality_scores = calculate_factuality_scores(df)


df['Factuality_Score'] = df.apply(lambda row: factuality_scores.get((row['SUBJECT_NAME'], row['OBJECT_NAME']), 0), axis=1)

output_file_path = 'score_da.csv' 
df.to_csv(output_file_path, index=False)

print(f"Les scores de factualité ont été ajoutés et sauvegardés dans '{output_file_path}'.")

Les scores de factualité ont été ajoutés et sauvegardés dans 'score_da.csv'.


In [3]:
import pandas as pd

file_path = 'score_da.csv'
df = pd.read_csv(file_path)

female_breast_cancer_types = [
    "Malignant neoplasm of breast", "Breast Carcinoma", "Breast cancer metastatic", "Triple Negative Breast Neoplasms",
    "Breast cancer invasive NOS", "Noninfiltrating Intraductal Carcinoma", "Secondary malignant neoplasm of breast",
    "Carcinoma breast stage IV", "Inflammatory Breast Carcinoma", "estrogen receptor-positive breast cancer",
    "HER2-positive carcinoma of breast", "Carcinoma, Ductal, Breast", "Sporadic Breast Carcinoma", "Invasive carcinoma of breast",
    "Female Breast Carcinoma", "Familial cancer of breast", "Contralateral breast cancer", "Ductal Breast Carcinoma",
    "Breast Cancer, Familial", "Basal-Like Breast Carcinoma", "Early-Stage Breast Carcinoma", "contralateral breast cancer",
    "malignant neoplasm of breast staging", "estrogen receptor-negative breast cancer", "Luminal B Breast Carcinoma",
    "Hereditary Breast and Ovarian Cancer Syndrome", "Carcinoma in situ of breast", "Invasive Ductal Breast Carcinoma",
    "bilateral breast cancer", "Locally advanced breast cancer", "Malignant neoplasm of female breast", "HER2-negative breast cancer",
    "Breast cancer stage II", "Triple-Negative Breast Carcinoma", "Breast cancer stage III", "Lobular carcinoma in situ of breast",
    "Advanced breast cancer diagnosis", "Stage 0 Breast Carcinoma", "Cancer en cuirasse", "Stage IV Inflammatory Breast Carcinoma",
    "Right-Sided Breast Neoplasms", "hereditary breast/ovarian cancer - BRCA1", "Stage 0 Breast Cancer AJCC v6 and v7",
    "Unilateral Breast Carcinoma", "Breast Mucinous Carcinoma", "cellular diagnosis, breast cancer", "Carcinoma breast stage I",
    "Papillary carcinoma of the breast"
]

male_breast_cancer_types = ["Carcinoma of Male Breast"]

recurrent_breast_cancer_types = ["Breast cancer recurrent", "Recurrent Breast Carcinoma"]
def classify_cancer_type(row):
    if row['OBJECT_NAME'] in female_breast_cancer_types:
        return "Female Breast Cancer"
    elif row['OBJECT_NAME'] in male_breast_cancer_types:
        return "Male Breast Cancer"
    elif row['OBJECT_NAME'] in recurrent_breast_cancer_types:
        return "Recurrent Breast Cancer"
    else:
        return "Other"
df['Cancer_Type'] = df.apply(classify_cancer_type, axis=1)
print(df.head())
df.to_csv('score_da.csv', index=False)

  PREDICATE                          SUBJECT_NAME  \
0    TREATS                     trastuzumab|ERBB2   
1    TREATS                           anastrozole   
2    TREATS                             Tamoxifen   
3    TREATS                 Immunologic Adjuvants   
4    TREATS  Autologous Stem Cell Transplantation   

                    OBJECT_NAME  Factuality_Score           Cancer_Type  
0  Malignant neoplasm of breast          0.192807  Female Breast Cancer  
1  Malignant neoplasm of breast          0.192807  Female Breast Cancer  
2  Malignant neoplasm of breast          0.192807  Female Breast Cancer  
3  Malignant neoplasm of breast          0.192807  Female Breast Cancer  
4      Breast cancer metastatic          0.041958  Female Breast Cancer  


In [13]:
import pandas as pd

# Load the DataFrame
df = pd.read_csv("score_da.csv", encoding='latin1')

categories = {
    'Chemotherapy': [
        'chemotherapy', 'chemo', 'adjuvant chemotherapy', 'cytotoxic agent', 
        'anthracyclines', 'chemotherapeutic agent', 'mitoxantrone', 'cyclophosphamide', 
        'ifosfamide', 'epirubicin', 'taxol', 'fluorouracil', 'etoposide', 'adriamycin', 
        'cisplatin', 'methotrexate', 'vincristine', 'carboplatin', 'doxorubicin', 
        'chlorambucil', 'bendamustine', 'thalidomide'
    ],
    'Radiotherapy': [
        'radiotherapy', 'radio', 'radiation therapy', 'brachytherapy', 'x-ray therapy', 
        'intensity-modulated radiotherapy', 'adjuvant radiotherapy', 'radiation therapy, lymphatic', 
        'external radiotherapy'
    ],
    'Surgery': [
        'surgery', 'surgical', 'mastectomy', 'lumpectomy', 'breast-conserving surgery', 
        'quadrantectomy', 'modified radical mastectomy', 'excision', 'radical mastectomy', 
        'dissection', 'reconstructive surgical procedures', 'operative surgical procedures', 
        'implantation procedure', 'transplantation', 'maxillary left canine abutment', 
        'mandibular right third molar abutment', 'skin transplantation', 'lymph node excision', 'biopsy'
    ],
    'Hormone Therapy': [
        'hormone therapy', 'tamoxifen', 'aromatase inhibitor', 'hormone replacement therapy', 
        'estrogens', 'estrogen replacement therapy', 'progestins', 'estradiol', 'estrone', 
        'megestrol acetate', 'medroxyprogesterone', 'raloxifene', 'buserelin', 
        'androgen antagonists', 'gonadotropin-releasing hormone analog', 'diethylstilbestrol'
    ],
    'Targeted Therapy': [
        'targeted therapy', 'herceptin', 'trastuzumab', 'lapatinib', 'bevacizumab', 
        'antiangiogenesis therapy'
    ],
    'Immunotherapy': [
        'immunotherapy', 'immunologic adjuvants', 'vaccines', 'anti-inflammatory agents', 
        'biological response modifiers', 'interleukin', 'monoclonal antibodies', 'checkpoint inhibitors'
    ],
    'Supportive Care': [
        'supportive care', 'palliative care', 'symptomatic treatment', 'group therapy', 
        'psychotherapy', 'medical castration', 'cold therapy', 'symptomatic treatment'
    ],
    'Alternative Therapy': [
        'alternative therapy', 'holistic', 'complementary', 'herbal medicine', 'acupuncture', 
        'homeopathy'
    ],
    'Pharmacotherapy': [
        'pharmacotherapy', 'pharmaceutical preparations', 'drug delivery systems', 'medications', 
        'oral anticoagulants', 'antineoplastic agents', 'corticosteroids'
    ],
    'Adjuvant Therapy': [
        'adjuvant therapy', 'high-dose chemotherapy', 'neoadjuvant therapy'
    ],
    'Gene Therapy': [
        'gene therapy', 'genetic therapy'
    ],
    'Biological Therapy': [
        'biological therapy', 'biological agents'
    ],
    'Endocrine Therapy': [
        'endocrine therapy', 'hormone replacement therapy'
    ],
    'Surgical Procedures': [
        'operative surgical procedures', 'surgical procedures'
    ],
    'Procedures on Breast': [
        'procedures on breast', 'breast-conserving surgery'
    ]
}
def classify_subject(subject_name):
    subject_name_lower = subject_name.lower()
    for category, keywords in categories.items():
        if any(keyword in subject_name_lower for keyword in keywords):
            return category
    return 'Other'  # Catégorie par défaut si aucun mot-clé ne correspond

# Appliquer la classification
df['Category'] = df['SUBJECT_NAME'].apply(classify_subject)

# Afficher les résultats
print(df)

# Save the results to a new CSV file
output_file = 'score_da.csv'
df.to_csv(output_file, index=False)


     PREDICATE                          SUBJECT_NAME  \
0       TREATS                     trastuzumab|ERBB2   
1       TREATS                           anastrozole   
2       TREATS                             Tamoxifen   
3       TREATS                 Immunologic Adjuvants   
4       TREATS  Autologous Stem Cell Transplantation   
...        ...                                   ...   
7764    TREATS                                 CT-P6   
7765    TREATS    Stereotactic Ablative Radiotherapy   
7766    TREATS                              Prodrugs   
7767    TREATS                  Aromatase Inhibitors   
7768    TREATS                      beta-thujaplicin   

                       OBJECT_NAME  Factuality_Score           Cancer_Type  \
0     Malignant neoplasm of breast          0.192807  Female Breast Cancer   
1     Malignant neoplasm of breast          0.192807  Female Breast Cancer   
2     Malignant neoplasm of breast          0.192807  Female Breast Cancer   
3     Malignant

In [9]:
import pandas as pd 

# Load the dataset
df = pd.read_csv("score_da.csv", encoding='latin1')
df.dropna(inplace=True)
df = df[df['PREDICATE'] != 'PROCESS_OF']
output_file_path = 'score_da.csv' 
df.to_csv(output_file_path, index=False)

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the DataFrame
df = pd.read_csv("score_da.csv", encoding='latin1')

# Combine SUBJECT_NAME and OBJECT_NAME into a single text feature
df['text'] = df['SUBJECT_NAME'] + " " + df['OBJECT_NAME']

# Define features and labels
X = df['text']
y = df['Category']

# Separate data into rows with known labels and rows with 'Other'
known_labels_df = df[df['Category'] != 'Other']
unknown_labels_df = df[df['Category'] == 'Other']

# Split known labels into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    known_labels_df['text'], known_labels_df['Category'], test_size=0.3, random_state=42
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# Make predictions and evaluate the model
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Predict categories for rows labeled as 'Other'
X_unknown_tfidf = vectorizer.transform(unknown_labels_df['text'])
unknown_labels_df['Category'] = model.predict(X_unknown_tfidf)

# Combine the DataFrames
df = pd.concat([known_labels_df, unknown_labels_df], axis=0)

# Save the results to a new CSV file
df.to_csv("score_da.csv", index=False)


Accuracy: 0.9579288025889967


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unknown_labels_df['Category'] = model.predict(X_unknown_tfidf)


In [3]:
import pandas as pd
df = pd.read_csv("score_da.csv", encoding='latin1')
df = df.drop(columns=['text'])

# Enregistrer le DataFrame modifié dans un nouveau fichier CSV
df.to_csv('score_da.csv', index=False)

In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv' 
df = pd.read_csv(file_path)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour créer et sauvegarder les graphes
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Male Breast Cancer')]
    
    # Créer le graphe pour la catégorie
    G_category = nx.Graph()
    for _, row in df_category.iterrows():
        G_category.add_edge(row['SUBJECT_NAME'], row['Cancer_Type'], relation=row['Factuality_Score'])
    
    # Visualiser le graphe
    plt.figure(figsize=(11, 5))
    pos = nx.spring_layout(G_category, seed=42)
    
    # Dessiner les nœuds et les arêtes
    nx.draw(G_category, pos, with_labels=True, node_color='lightblue', node_size=900, edge_color='gray', font_size=10, font_weight='bold', alpha=0.9,verticalalignment='center')
    
    # Dessiner les étiquettes des arêtes
    edge_labels = nx.get_edge_attributes(G_category, 'relation')
    nx.draw_networkx_edge_labels(G_category, pos, edge_labels=edge_labels)
    
   
    
    # Enregistrer l'image du graphe
    file_name = f'assets/graph/treats_male/male_cancer_graph_{category.replace(" ", "_")}.png'
    plt.savefig(file_name, dpi=110, bbox_inches='tight')
    plt.close()  


In [2]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv' 
df = pd.read_csv(file_path)


# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour créer et sauvegarder les graphes
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Female Breast Cancer')]
    
    # Créer le graphe pour la catégorie
    G_category = nx.Graph()
    for _, row in df_category.iterrows():
        G_category.add_edge(row['SUBJECT_NAME'], row['Cancer_Type'], relation=row['Factuality_Score'])
    
    # Visualiser le graphe
    plt.figure(figsize=(11, 5))
    pos = nx.spring_layout(G_category, seed=42)
    
    # Dessiner les nœuds et les arêtes
    nx.draw(G_category, pos, with_labels=True, node_color='lightblue', node_size=900, edge_color='gray', font_size=10, font_weight='bold', alpha=0.9,verticalalignment='center')
    
    # Dessiner les étiquettes des arêtes
    edge_labels = nx.get_edge_attributes(G_category, 'relation')
    nx.draw_networkx_edge_labels(G_category, pos, edge_labels=edge_labels)

    
    # Enregistrer l'image du graphe
    file_name = f'assets/graph/treats_female/female_cancer_graph_{category.replace(" ", "_")}.png'
    plt.savefig(file_name, dpi=110, bbox_inches='tight')
    plt.close()  

In [3]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv' 
df = pd.read_csv(file_path)



# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour créer et sauvegarder les graphes
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Recurrent Breast Cancer')]
    
    # Créer le graphe pour la catégorie
    G_category = nx.Graph()
    for _, row in df_category.iterrows():
        G_category.add_edge(row['SUBJECT_NAME'], row['Cancer_Type'], relation=row['Factuality_Score'])
    
    # Visualiser le graphe
    plt.figure(figsize=(11, 5))
    pos = nx.spring_layout(G_category, seed=42)
    
    # Dessiner les nœuds et les arêtes
    nx.draw(G_category, pos, with_labels=True, node_color='lightblue', node_size=900, edge_color='gray', font_size=10, font_weight='bold', alpha=0.9,verticalalignment='center')
    
    # Dessiner les étiquettes des arêtes
    edge_labels = nx.get_edge_attributes(G_category, 'relation')
    nx.draw_networkx_edge_labels(G_category, pos, edge_labels=edge_labels)

    
    # Enregistrer l'image du graphe
    file_name = f'assets/graph/treats_reccu/reccu_cancer_graph_{category.replace(" ", "_")}.png'
    plt.savefig(file_name, dpi=110, bbox_inches='tight')
    plt.close()  

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Charger les données
df = pd.read_csv('score_da.csv')

# Encoder les variables catégorielles
le = LabelEncoder()
df['PREDICATE'] = le.fit_transform(df['PREDICATE'])
df['SUBJECT_NAME'] = le.fit_transform(df['SUBJECT_NAME'])
df['OBJECT_NAME'] = le.fit_transform(df['OBJECT_NAME'])
df['Category'] = le.fit_transform(df['Category'])
df['Cancer_Type'] = le.fit_transform(df['Cancer_Type'])

# Sélectionner les variables d'intérêt
X = df[['PREDICATE', 'SUBJECT_NAME', 'Category']]
y = df['OBJECT_NAME']  # par exemple, l'impact sur l'objet de la relation

# Diviser en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Créer et entraîner un modèle de Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prédire sur les données de test
y_pred = model.predict(X_test)

# Évaluer la performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.20163020163020162


In [8]:
def simulate_intervention(subject_name, cause_category):
    # Encoder les entrées de l'utilisateur
    subject_encoded = le.transform([subject_name])
    cause_category_encoded = le.transform([cause_category])
    
    # Créer une nouvelle entrée à prédire
    new_entry = [[subject_encoded[0], cause_category_encoded[0]]]

    # Prédire l'impact sur l'objet de la relation
    predicted_object = model.predict(new_entry)
    return le.inverse_transform(predicted_object)

# Simuler une intervention
result = simulate_intervention('Estrogens', 'Hormonal Factors')
print(f'Lintervention pourrait potentiellement affecter : {result}')

ValueError: y contains previously unseen labels: 'Estrogens'

In [9]:
from pyvis.network import Network
import pandas as pd
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv'
df = pd.read_csv(file_path)

# Créer le répertoire 'graphs' s'il n'existe pas
output_dir = 'C:/Users/PcPack/cancer/assets/graph/treats_female'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour créer et sauvegarder les graphes
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Female Breast Cancer')]
    
    # Créer le graphe pour la catégorie
    net = Network(notebook=True)
    for _, row in df_category.iterrows():
        # Formater la factualité avec 5 décimales
        formatted_score = f"{row['Factuality_Score']:.7f}"
        
        # Ajouter les nœuds
        net.add_node(row['SUBJECT_NAME'], title=row['SUBJECT_NAME'], size=15, color='lightblue')
        net.add_node(row['Cancer_Type'], title=row['Cancer_Type'], size=15, color='lightblue')
        
        # Ajouter les arêtes avec labels
        net.add_edge(
            row['SUBJECT_NAME'], 
            row['Cancer_Type'], 
            title=f"Factuality Score: {formatted_score}", 
            label=formatted_score,  
            color='gray'
        )
    
    # Sauvegarder le graphe dans le répertoire 'graphs'
    file_path = os.path.join(output_dir, f'{category}.html')
    net.show(file_path)


C:/Users/PcPack/cancer/assets/graph/treats_female\Targeted Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Hormone Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Immunotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Surgery.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Chemotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Adjuvant Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Pharmacotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Radiotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Endocrine Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Alternative Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Gene Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Supportive Care.html
C:/Users/PcPack/cancer/assets/graph/treats_female\Procedures on Breast.html


In [10]:
from pyvis.network import Network
import pandas as pd
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv'
df = pd.read_csv(file_path)

# Créer le répertoire 'graphs' s'il n'existe pas
output_dir = 'C:/Users/PcPack/cancer/assets/graph/treats_male'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour créer et sauvegarder les graphes
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Male Breast Cancer')]
    
    # Créer le graphe pour la catégorie
    net = Network(notebook=True)
    for _, row in df_category.iterrows():
        # Formater la factualité avec 5 décimales
        formatted_score = f"{row['Factuality_Score']:.7f}"
        
        # Ajouter les nœuds
        net.add_node(row['SUBJECT_NAME'], title=row['SUBJECT_NAME'], size=15, color='lightblue')
        net.add_node(row['Cancer_Type'], title=row['Cancer_Type'], size=15, color='lightblue')
        
        # Ajouter les arêtes avec labels
        net.add_edge(
            row['SUBJECT_NAME'], 
            row['Cancer_Type'], 
            title=f"Factuality Score: {formatted_score}", 
            label=formatted_score,  
            color='gray'
        )
    
    # Sauvegarder le graphe dans le répertoire 'graphs'
    file_path = os.path.join(output_dir, f'{category}.html')
    net.show(file_path)


C:/Users/PcPack/cancer/assets/graph/treats_male\Targeted Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Hormone Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Immunotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Surgery.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Chemotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Adjuvant Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Pharmacotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Radiotherapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Endocrine Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Alternative Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Gene Therapy.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Supportive Care.html
C:/Users/PcPack/cancer/assets/graph/treats_male\Procedures on Breast.html


In [11]:
from pyvis.network import Network
import pandas as pd
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv'
df = pd.read_csv(file_path)

# Créer le répertoire 'graphs' s'il n'existe pas
output_dir = 'C:/Users/PcPack/cancer/assets/graph/reccu_treats'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour créer et sauvegarder les graphes
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Recurrent Breast Cancer')]
    
    # Créer le graphe pour la catégorie
    net = Network(notebook=True)
    for _, row in df_category.iterrows():
        # Formater la factualité avec 5 décimales
        formatted_score = f"{row['Factuality_Score']:.7f}"
        
        # Ajouter les nœuds
        net.add_node(row['SUBJECT_NAME'], title=row['SUBJECT_NAME'], size=15, color='lightblue')
        net.add_node(row['Cancer_Type'], title=row['Cancer_Type'], size=15, color='lightblue')
        
        # Ajouter les arêtes avec labels
        net.add_edge(
            row['SUBJECT_NAME'], 
            row['Cancer_Type'], 
            title=f"Factuality Score: {formatted_score}", 
            label=formatted_score,  
            color='gray'
        )
    
    # Sauvegarder le graphe dans le répertoire 'graphs'
    file_path = os.path.join(output_dir, f'{category}.html')
    net.show(file_path)


C:/Users/PcPack/cancer/assets/graph/reccu_treats\Targeted Therapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Hormone Therapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Immunotherapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Surgery.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Chemotherapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Adjuvant Therapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Pharmacotherapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Radiotherapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Endocrine Therapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Alternative Therapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Gene Therapy.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Supportive Care.html
C:/Users/PcPack/cancer/assets/graph/reccu_treats\Procedures on Breast.html


In [1]:
import pandas as pd
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv'
df = pd.read_csv(file_path)

# Créer le répertoire 'assets/tables' s'il n'existe pas
output_dir = 'assets/tables/male_treats'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour extraire les nœuds et sauvegarder dans des fichiers CSV
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Male Breast Cancer')]
    
    # Préparer une liste pour stocker les informations des nœuds
    nodes = []

    # Ajouter les nœuds
    for _, row in df_category.iterrows():
        # Formater la factualité avec 7 décimales
        formatted_score = f"{row['Factuality_Score']:.7f}"
        
        # Ajouter le nœud pour SUBJECT_NAME
        nodes.append({
            'title': row['SUBJECT_NAME'],
            'factuality_score': formatted_score
        })
        
        # Ajouter le nœud pour Cancer_Type (si ce n'est pas déjà ajouté)
        if not any(node['title'] == row['Cancer_Type'] for node in nodes):
            nodes.append({
                'title': row['Cancer_Type'],
                'factuality_score': formatted_score
            })
    
    # Convertir la liste des nœuds en DataFrame, en retirant les doublons
    nodes_df = pd.DataFrame(nodes).drop_duplicates()
    
    # Sauvegarder les données des nœuds dans un fichier CSV spécifique à chaque catégorie
    nodes_csv_path = os.path.join(output_dir, f'nodes_with_factuality_scores_{category}.csv')
    nodes_df.to_csv(nodes_csv_path, index=False)


In [2]:
import pandas as pd
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv'
df = pd.read_csv(file_path)

# Créer le répertoire 'assets/tables' s'il n'existe pas
output_dir = 'assets/tables/female_treats'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour extraire les nœuds et sauvegarder dans des fichiers CSV
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Female Breast Cancer')]
    
    # Préparer une liste pour stocker les informations des nœuds
    nodes = []

    # Ajouter les nœuds
    for _, row in df_category.iterrows():
        # Formater la factualité avec 7 décimales
        formatted_score = f"{row['Factuality_Score']:.7f}"
        
        # Ajouter le nœud pour SUBJECT_NAME
        nodes.append({
            'title': row['SUBJECT_NAME'],
            'factuality_score': formatted_score
        })
        
        # Ajouter le nœud pour Cancer_Type (si ce n'est pas déjà ajouté)
        if not any(node['title'] == row['Cancer_Type'] for node in nodes):
            nodes.append({
                'title': row['Cancer_Type'],
                'factuality_score': formatted_score
            })
    
    # Convertir la liste des nœuds en DataFrame, en retirant les doublons
    nodes_df = pd.DataFrame(nodes).drop_duplicates()
    
    # Sauvegarder les données des nœuds dans un fichier CSV spécifique à chaque catégorie
    nodes_csv_path = os.path.join(output_dir, f'nodes_with_factuality_scores_{category}.csv')
    nodes_df.to_csv(nodes_csv_path, index=False)


In [3]:
import pandas as pd
import os

# Charger le fichier CSV dans un DataFrame
file_path = 'score_da.csv'
df = pd.read_csv(file_path)

# Créer le répertoire 'assets/tables' s'il n'existe pas
output_dir = 'assets/tables/reccu_treats'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Obtenir les catégories uniques
categories = df['Category'].unique()

# Itérer sur chaque catégorie pour extraire les nœuds et sauvegarder dans des fichiers CSV
for category in categories:
    # Filtrer les données pour la catégorie actuelle et le Cancer_Type spécifique
    df_category = df[(df['Category'] == category) & (df['Cancer_Type'] == 'Recurrent Breast Cancer')]
    
    # Préparer une liste pour stocker les informations des nœuds
    nodes = []

    # Ajouter les nœuds
    for _, row in df_category.iterrows():
        # Formater la factualité avec 7 décimales
        formatted_score = f"{row['Factuality_Score']:.7f}"
        
        # Ajouter le nœud pour SUBJECT_NAME
        nodes.append({
            'title': row['SUBJECT_NAME'],
            'factuality_score': formatted_score
        })
        
        # Ajouter le nœud pour Cancer_Type (si ce n'est pas déjà ajouté)
        if not any(node['title'] == row['Cancer_Type'] for node in nodes):
            nodes.append({
                'title': row['Cancer_Type'],
                'factuality_score': formatted_score
            })
    
    # Convertir la liste des nœuds en DataFrame, en retirant les doublons
    nodes_df = pd.DataFrame(nodes).drop_duplicates()
    
    # Sauvegarder les données des nœuds dans un fichier CSV spécifique à chaque catégorie
    nodes_csv_path = os.path.join(output_dir, f'nodes_with_factuality_scores_{category}.csv')
    nodes_df.to_csv(nodes_csv_path, index=False)
