# Import librairies

In [7]:
import pandas as pd
import requests
import holidays

# Récupérer les vacances scolaires

In [2]:
# URL de l'API des vacances scolaires
url_vacances = "https://data.education.gouv.fr/api/records/1.0/search/?dataset=fr-en-calendrier-scolaire&q=&rows=1000&sort=start_date&facet=zone&facet=description&facet=start_date&facet=end_date"

# Charger les données
response = requests.get(url_vacances)
if response.status_code == 200 :
    data = response.json()
    records = data['records']

# Convertir en DataFrame
df = pd.json_normalize(records)

df.columns = df.columns.str.replace('^fields.', '', regex=True)

# Filtrer la Zone B uniquement
df_zone_b_nantes = df[(df['zones'] == 'Zone B') & (df['location'] == 'Nantes')].copy()

# Filtrer années 2025 et 2026
df_year_filtered = df_zone_b_nantes[(df_zone_b_nantes['annee_scolaire'] == '2024-2025') | (df_zone_b_nantes['annee_scolaire'] == '2025-2026')].copy()

# Garder l'essentiel
df_vacances = df_year_filtered[['start_date', 'end_date', 'location', 'annee_scolaire', 'description']].copy()
df_vacances['end_date'] = pd.to_datetime(df_vacances['end_date']).dt.date
df_vacances['start_date'] = pd.to_datetime(df_vacances['start_date']).dt.date


In [3]:
df_vacances.head()

Unnamed: 0,start_date,end_date,location,annee_scolaire,description
28,2026-07-03,2026-07-03,Nantes,2025-2026,Début des Vacances d'Été
58,2026-05-13,2026-05-17,Nantes,2025-2026,Pont de l'Ascension
79,2026-04-10,2026-04-26,Nantes,2025-2026,Vacances de Printemps
111,2026-02-13,2026-03-01,Nantes,2025-2026,Vacances d'Hiver
150,2025-12-19,2026-01-04,Nantes,2025-2026,Vacances de Noël


Faire une ligne par jour de vacances

In [4]:
# On génère une colonne avec les plages de dates
df_vacances['dates'] = df_vacances.apply(
    lambda row: pd.date_range(start=row['start_date'], end=row['end_date']).tolist(),
    axis=1
)

df_vacances_day_by_day = df_vacances.explode('dates')

df_vacances_final = df_vacances_day_by_day[['dates', 'description']].reset_index(drop=True)


In [216]:
df_vacances_final.head()

Unnamed: 0,dates,description
0,2026-07-03,Début des Vacances d'Été
1,2026-05-13,Pont de l'Ascension
2,2026-05-14,Pont de l'Ascension
3,2026-05-15,Pont de l'Ascension
4,2026-05-16,Pont de l'Ascension


In [5]:
df_vacances_final['is_holiday'] = 1

# Récupérer les jours feriés

In [11]:
# Récupération des jours fériés
fr_holidays = holidays.France(years=[2025,2026])
jours_feries_data = [{'dates': date, 'description': name} for date, name in sorted(fr_holidays.items())]
df_feries = pd.DataFrame(jours_feries_data)

In [12]:
df_feries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   dates        22 non-null     object
 1   description  22 non-null     object
dtypes: object(2)
memory usage: 480.0+ bytes


In [13]:
df_feries['is_public_holiday'] = 1

# Récupérer les jours de week-end

In [26]:
start_date = '2025-02-15'
end_date = '2026-12-31'

# Générer toutes les dates
dates = pd.date_range(start=start_date, end=end_date, freq='D')

df_all_days = pd.DataFrame({'dates': dates})
df_all_days['description'] = df_all_days['dates'].dt.day_name()

In [27]:
df_all_days

Unnamed: 0,dates,description
0,2025-02-15,Saturday
1,2025-02-16,Sunday
2,2025-02-17,Monday
3,2025-02-18,Tuesday
4,2025-02-19,Wednesday
...,...,...
680,2026-12-27,Sunday
681,2026-12-28,Monday
682,2026-12-29,Tuesday
683,2026-12-30,Wednesday


# Grouper les dataset

In [28]:
# Ajouter les indicateurs
df_all_days['is_vacances'] = df_all_days['dates'].isin(df_vacances_final['dates']).astype(int)
df_all_days['is_ferie'] = df_all_days['dates'].isin(df_feries['dates']).astype(int)

  df_all_days['is_ferie'] = df_all_days['dates'].isin(df_feries['dates']).astype(int)


In [29]:
df_all_days['dates'] = pd.to_datetime(df_all_days['dates'])

In [30]:
df_all_days.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 685 entries, 0 to 684
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   dates        685 non-null    datetime64[ns]
 1   description  685 non-null    object        
 2   is_vacances  685 non-null    int64         
 3   is_ferie     685 non-null    int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 21.5+ KB


In [31]:
df_all_days

Unnamed: 0,dates,description,is_vacances,is_ferie
0,2025-02-15,Saturday,1,0
1,2025-02-16,Sunday,1,0
2,2025-02-17,Monday,1,0
3,2025-02-18,Tuesday,1,0
4,2025-02-19,Wednesday,1,0
...,...,...,...,...
680,2026-12-27,Sunday,0,0
681,2026-12-28,Monday,0,0
682,2026-12-29,Tuesday,0,0
683,2026-12-30,Wednesday,0,0


In [None]:
#df_all_days.to_csv('data/preprocessed/weekend_holiday_public_holidays_202504112326.csv', index=False)