In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import re


In [6]:
df = pd.read_csv('C:\\Users\\pjcun\\Dropbox\\M2 AI4PH\\NLP\\NLP_LitReview\\src\\analysis\\Familly NICU.csv')

In [18]:
n_total = len(df)
print(f"Abstract included : {n_total}")

#year
years = df['Year'].dropna()
print(f"\nYear :")
print(f"  • Range : {int(years.min())} - {int(years.max())}")
print(f"  • Median : {int(years.median())}")

#Distribution
df['Decade'] = (df['Year'] // 10) * 10
decade_counts = df['Decade'].value_counts().sort_index()
print(f"\nRepartition :")
for decade, count in decade_counts.items():
    if not pd.isna(decade):
        pct = (count / n_total) * 100
        print(f"  • {int(decade)}s : {count} ({pct:.1f}%)")

Abstract included : 36

Year :
  • Range : 1996 - 2025
  • Median : 2020

Repartition :
  • 1990s : 2 (5.6%)
  • 2000s : 2 (5.6%)
  • 2010s : 11 (30.6%)
  • 2020s : 20 (55.6%)


In [13]:
def identify_study_types(text):
    """Identifier le type d'étude depuis l'abstract"""
    if pd.isna(text):
        return 'Non spécifié'
    
    text_lower = text.lower()
    
    # Patterns de recherche
    if any(word in text_lower for word in ['randomized', 'randomised', 'rct', 'random allocation']):
        return 'RCT'
    elif any(word in text_lower for word in ['systematic review', 'meta-analysis', 'meta analysis']):
        return 'Review'
    elif any(word in text_lower for word in ['qualitative', 'phenomenological', 'grounded theory', 'interview']):
        return 'Qualitative'
    elif any(word in text_lower for word in ['cohort', 'longitudinal', 'prospective']):
        return 'Cohort/Longitudinal'
    elif any(word in text_lower for word in ['cross-sectional', 'cross sectional', 'survey']):
        return 'Cross-Sectional/Survey'
    elif any(word in text_lower for word in ['case-control', 'case control']):
        return 'Case-control'
    elif any(word in text_lower for word in ['retrospective', 'chart review', 'medical record']):
        return 'Rétrospective'
    elif any(word in text_lower for word in ['quasi-experimental', 'quasi experimental', 'pilot']):
        return 'QPilote'
    else:
        return 'Unspecified'

df['Study_Type'] = df['Abstract'].apply(identify_study_types)
study_type_counts = df['Study_Type'].value_counts()

print(f"Study type :")
for study_type, count in study_type_counts.items():
    pct = (count / n_total) * 100
    print(f"  • {study_type} : {count} ({pct:.1f}%)")

Study type :
  • Unspecified : 13 (36.1%)
  • Qualitative : 9 (25.0%)
  • Cohort/Longitudinal : 6 (16.7%)
  • Rétrospective : 2 (5.6%)
  • Cross-Sectional/Survey : 2 (5.6%)
  • RCT : 2 (5.6%)
  • QPilote : 1 (2.8%)
  • Review : 1 (2.8%)


In [16]:
def identify_populations(text):
    if pd.isna(text):
        return []
    
    text_lower = text.lower()
    populations = []
    
    # Pathologies
    if any(word in text_lower for word in ['stroke', 'cerebrovascular']):
        populations.append('Stroke')
    if any(word in text_lower for word in ['traumatic brain injury', 'tbi', 'head injury']):
        populations.append('TBI')
    if any(word in text_lower for word in ['subarachnoid hemorrhage', 'sah', 'aneurysm']):
        populations.append('SAH')
    if any(word in text_lower for word in ['brain tumor', 'glioma', 'neurosurgical']):
        populations.append('Brain tumor')
    if any(word in text_lower for word in ['cardiac arrest', 'resuscitation']):
        populations.append('Cardiac Arrest')
    if any(word in text_lower for word in ['palliative', 'end-of-life', 'withdrawal']):
        populations.append('palliative care')
    
    return populations if populations else ['Unspecified']

# Collecter toutes les populations
all_populations = []
for abstract in df['Abstract']:
    all_populations.extend(identify_populations(abstract))

pop_counts = Counter(all_populations)
print(f"Populations :")
for pop, count in pop_counts.most_common():
    pct = (count / n_total) * 100
    print(f"  • {pop} : {count} ({pct:.1f}%)")

Populations :
  • Unspecified : 26 (72.2%)
  • Stroke : 5 (13.9%)
  • palliative care : 4 (11.1%)
  • TBI : 3 (8.3%)
  • Brain tumor : 1 (2.8%)
  • Cardiac Arrest : 1 (2.8%)
