In [1]:
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from scipy.ndimage import binary_dilation
import pandas as pd
from wordcloud import WordCloud
from ipywidgets import interact, IntSlider

In [2]:
# Read the CSV file into a pandas DataFrame
csv_file_path = "dpt2020.csv"
df = pd.read_csv(csv_file_path, delimiter=";")

# Rename the column 'annais' to 'annee'
df = df.rename(columns={'annais': 'annee'})
df['annee'] = pd.to_numeric(df['annee'], errors='coerce')
df = df.dropna(subset=['annee'])
df['annee'] = df['annee'].astype(int)

# Remove "_PRENOMS_RARES" from values in the 'preusuel' column
df['preusuel'] = df['preusuel'].str.replace('_PRENOMS_RARES', '')

# Group the data by 'preusuel' and sum the 'nombre' values
grouped = df.groupby('preusuel')['nombre'].sum()

# Create the word frequencies dictionary
word_frequencies = grouped.to_dict()

# Load the baby image
baby_image_path = "./baby.jpg"
baby_image = Image.open(baby_image_path).convert("RGBA")
baby_data = np.array(baby_image)

# Convert #338855 to RGBA format
color = (52, 136, 86, 255)  # R, G, B, Alpha
color_rgba = np.array(color, dtype=np.uint8)

# Set pixels not equal to #338855 to white (255, 255, 255, 255)
baby_data[~np.all(baby_data == color_rgba, axis=-1)] = [255, 255, 255, 255]

# Create the mask based on color code
mask = np.zeros(baby_data.shape[:2], dtype=np.uint8)
mask[(baby_data[..., :3] == [255, 255, 255]).all(axis=-1)] = 255

# Invert the mask
inverted_mask = np.invert(mask)

# Function to update the word cloud based on the selected year
def update_wordcloud(year):
    # Filter the DataFrame based on the selected year
    filtered_df = df[df['annee'] == year]
    
    # Group the filtered data by 'preusuel' and sum the 'nombre' values
    grouped = filtered_df.groupby('preusuel')['nombre'].sum()
    
    # Create the word frequencies dictionary
    word_frequencies = grouped.to_dict()
    
    # Create the WordCloud object with the modified image and inverted mask
    wordcloud = WordCloud(background_color="white", mask=inverted_mask)
    
    # Generate the word cloud from word frequencies
    wordcloud.generate_from_frequencies(word_frequencies)
    
    # Plot the word cloud with the original baby image as the background
    plt.figure(figsize=(8, 8))
    plt.imshow(baby_data, interpolation="bilinear")
    plt.imshow(wordcloud, interpolation="bilinear", alpha=0.9)
    plt.axis("off")

# Get the earliest and latest years in the dataset
earliest_year = df['annee'].min()
latest_year = df['annee'].max()

# Create a sliding bar widget for selecting the year
year_slider = IntSlider(min=earliest_year, max=latest_year, step=1, value=earliest_year)
interact(update_wordcloud, year=year_slider)
plt.show()

interactive(children=(IntSlider(value=1900, description='year', max=2020, min=1900), Output()), _dom_classes=(…