# **1. Data Preparation**

In [1]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
import unicodedata

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
import re
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

french_stopwords = stopwords.words('french')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


### Extract Job Titles

In [None]:
# Target URL
url = "https://candidat.francetravail.fr/offres/emploi/informatique-telecoms/s28"

# Define headers to look like a real browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/118.0.0.0 Safari/537.36"
}

# Send GET request
response = requests.get(url, headers=headers)
response.raise_for_status()  # Raise error if request failed

# Parse HTML
soup = BeautifulSoup(response.text, "html.parser")

# Find the container div
container = soup.find("div", class_="container-list")

# Extract <li> items with <span class="media-body">
job_items = []
if container:
    for li in container.find_all("li"):
        span = li.find("span", class_="media-body")
        if span:
            # Extract job title text
            title = span.get_text(strip=True)
            job_items.append(title)

print(job_items)

['Développeur informatique', 'Chef de projet informatique', 'Consultant informatique', 'Chef de projet multimédia', 'Technicien de maintenance informatique', 'Développeur php', 'Développeur web', 'Webmaster', 'Développeur java', 'Testeur', 'Administrateur systèmes et réseaux informatique', 'Chef de projet technique', 'Chef de projet moa', 'Monteur-câbleur', 'Data scientist', 'Product owner', 'Product manager', 'Data analyst', 'Technicien télécom', 'Analyste programmeur', 'Architecte logiciel', 'Consultant décisionnel', 'Administrateur linux', 'Développeur javascript', 'Expert cybersécurité', 'Traffic manager', 'Delivery manager', 'Scrum master', 'Chef de produit technique en informatique', 'Concepteur de jeux vidéo']


### Extract Job Offres

In [None]:
CLIENT_ID = "PAR_projetfouilledonnes_4e2e68b7cde26f6e0f7a0866b73f38b3176e833eabb00de58b9529cfa955decb"
CLIENT_SECRET = "e20d7ba4bfcea8b863521fcafb8b92e21d75980952d340e737fa2dadfc805b55"
TOKEN_URL = "https://entreprise.francetravail.fr/connexion/oauth2/access_token?realm=/partenaire"
API_BASE = "https://api.francetravail.io/partenaire/offresdemploi/v2/offres/search"

def get_token():
    payload = {
        "grant_type": "client_credentials",
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "scope": "o2dsoffre api_offresdemploiv2"
    }
    r = requests.post(TOKEN_URL, data=payload)
    r.raise_for_status()
    return r.json()["access_token"]

def search_offres(token, mots_cles="Programmation informatique", departement="74", range_="0-5"):
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "motsCles": mots_cles,
        #"departement": departement,
        #"range": range_
    }
    r = requests.get(API_BASE, headers=headers, params=params)
    r.raise_for_status()
    return r.json()

# Get token
token = get_token()
print("Token OK:", token)

# Iterate over job_items and collect all offers in a single dict
all_offres = {}  # Key: job title, Value: API result
for job in job_items:
    try:
        result = search_offres(token, mots_cles=job)
        all_offres[job] = result  # store API response under job title
    except Exception as e:
        print(f"Error searching '{job}': {e}")
        all_offres[job] = None

# Now all_offres contains a single dict with all jobs
print(json.dumps(all_offres, indent=4, ensure_ascii=False))

### Convert Json to DataFrame

In [None]:
# Function to flatten nested lists in a DataFrame
def flatten_lists(x):
    if isinstance(x, list):
        return json.dumps(x, ensure_ascii=False)
    return x

# Collect all offers in a single list
all_results = []
for job_title in list(all_offres.keys()):
    offres_list = all_offres[job_title].get("resultats", [])
    all_results.extend(offres_list)

# Create a DataFrame
df = pd.json_normalize(all_results, sep='.')

# Flatten any list columns
df = df.applymap(flatten_lists)

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 150)

In [None]:
df.to_csv("francetravail.csv",index=False)

### Data Preprocessing

In [2]:
df = pd.read_csv("francetravail.csv")

In [3]:
df = df[df['typeContrat'].isin(['CDI','CDD','MIS'])]

In [4]:
## Step 1: Identify Feature Groups

text_cols = ['intitule', 'description']
cat_cols  = ['natureContrat', 'experienceExige', 'entrepriseAdaptee', 'employeurHandiEngage', 'entreprise.nom', 'lieuTravail.commune']
num_cols  = ['secteurActivite','nombrePostes', 'lieuTravail.latitude', 'lieuTravail.longitude', 'lieuTravail.codePostal']

target = 'typeContrat'

In [5]:
## Step 2: Handlle Text Columns

class TextCleaner(BaseEstimator, TransformerMixin):
    """
    Cleans text by:
    - Lowercasing
    - Removing accents
    - Removing special characters
    - Removing extra spaces
    """
    def __init__(self, key):
        self.key = key

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        def clean_text(text):
            if pd.isna(text):
                return ""
            # lowercase
            text = text.lower()
            # remove accents
            text = ''.join(
                c for c in unicodedata.normalize('NFD', text)
                if unicodedata.category(c) != 'Mn'
            )
            # remove special characters (keep letters, numbers, and spaces)
            text = re.sub(r'[^a-z0-9\s]', ' ', text)
            # remove extra spaces
            text = re.sub(r'\s+', ' ', text).strip()
            return text

        return X[self.key].apply(clean_text)


In [6]:
# Build pipeline with cleaning + TF-IDF
text_pipeline = FeatureUnion([
    ('intitule_tfidf', Pipeline([
        ('cleaner', TextCleaner('intitule')),
        ('tfidf', TfidfVectorizer(max_features=300, stop_words=french_stopwords))
    ])),
    ('description_tfidf', Pipeline([
        ('cleaner', TextCleaner('description')),
        ('tfidf', TfidfVectorizer(max_features=1000, stop_words=french_stopwords))
    ]))
])

X_train_text = df[text_cols]
X_train_transformed = text_pipeline.fit_transform(X_train_text)

In [7]:
## Step 3: Handlle Categorical Columns

# Categorical pipeline
cat_pipeline = Pipeline([
    # Impute missing values with the most frequent category
    ('imputer', SimpleImputer(strategy='most_frequent')),

    # One-hot encode categories
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Apply to your categorical data
X_cat = df[cat_cols]
X_cat_transformed = cat_pipeline.fit_transform(X_cat)

In [8]:
## Step 4: Handlle Numerical Columns

num_pipeline = Pipeline([
    # 1Impute missing values with median
    ('imputer', SimpleImputer(strategy='median')),

    # Scale features to mean=0, std=1
    ('scaler', StandardScaler())
])

# Apply to numerical data
X_num = df[num_cols]
X_num_transformed = num_pipeline.fit_transform(X_num)

In [9]:
## Step 5 : Build unified preprocessing pipeline

preprocessor = ColumnTransformer(transformers=[
    # Text features
    ('text',
     Pipeline([
         ('union', FeatureUnion([
             ('intitule_tfidf', Pipeline([
                 ('cleaner', TextCleaner('intitule')),
                 ('tfidf', TfidfVectorizer(max_features=300, stop_words=french_stopwords))
             ])),
             ('description_tfidf', Pipeline([
                 ('cleaner', TextCleaner('description')),
                 ('tfidf', TfidfVectorizer(max_features=1000, stop_words=french_stopwords))
             ]))
         ]))
     ]), text_cols),

    # Categorical features
    ('cat',
     Pipeline([
         ('imputer', SimpleImputer(strategy='most_frequent')),
         ('onehot', OneHotEncoder(handle_unknown='ignore'))
     ]), cat_cols),

    # Numerical features
    ('num',
     Pipeline([
         ('imputer', SimpleImputer(strategy='median')),
         ('scaler', StandardScaler())
     ]), num_cols)
])

# Fit and transform

X = df[text_cols + cat_cols + num_cols]
y = df[target]

X = preprocessor.fit_transform(X)

In [10]:
## Step 6 : Convert y to numerical

# Initialize encoder
le = LabelEncoder()

# Fit on y and transform to numeric
y = le.fit_transform(y)

In [33]:
y

array([1, 0, 2, ..., 1, 1, 1])

# **2. Models Training**

In [13]:
!pip install -q catboost

In [24]:
from datetime import datetime
import os
from sklearn.model_selection import StratifiedKFold

In [22]:
OUTPUT_PATH = '/content'

# Get current date and time
now = datetime.now()
folder_name = f"outputs_{now.year}_{now.day:02d}_{now.hour:02d}_{now.minute:02d}"

# Full path for new model directory
OUTPUT_PATH = os.path.join(OUTPUT_PATH, folder_name)

# Create the directory if it doesn't exist
os.makedirs(OUTPUT_PATH, exist_ok=True)

In [25]:
## Step 1 : Split data

from sklearn.model_selection import train_test_split

# Split: 90% Train/Val + 10% Test

X_train_val, X_test, y_train_val, y_test  = train_test_split(
    X,
    y,
    test_size=0.1,
    random_state=42,
    stratify=y
)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [27]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = np.unique(y)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y)

class_weight_dict = dict(zip(classes, class_weights))

print("Recommended class_weight per class:", class_weight_dict)

Recommended class_weight per class: {np.int64(0): np.float64(2.8465732087227416), np.int64(1): np.float64(0.41609744990892533), np.int64(2): np.float64(4.074693422519509)}


In [28]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.svm import SVC

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_predict
from sklearn.metrics import classification_report, confusion_matrix, balanced_accuracy_score, f1_score,accuracy_score
import joblib
import os
from datetime import datetime
from itertools import combinations
import json

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier

# Assuming you already have class_weight_dict:
# class_weight_dict = {'CDI': 0.416, 'CDD': 2.85, 'MIS': 4.08}

CLASSIFIERS = {
    # -----------------------------
    # Random Forest (multiclass)
    # -----------------------------
    "RandomForest": RandomForestClassifier(
        n_estimators=100,
        criterion='gini',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        max_features='sqrt',
        bootstrap=True,
        class_weight=class_weight_dict,
        random_state=42,
        n_jobs=-1
    ),

    # -----------------------------
    # XGBoost (multiclass)
    # -----------------------------
    "XGBoost": XGBClassifier(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.3,
        subsample=1,
        colsample_bytree=1,
        reg_alpha=0,
        reg_lambda=1,
        use_label_encoder=False,
        eval_metric='mlogloss',
        random_state=42,
        verbosity=1
    ),

    # -----------------------------
    # CatBoost (multiclass)
    # -----------------------------
    "CatBoost": CatBoostClassifier(
        iterations=100,
        depth=6,
        learning_rate=0.03,
        l2_leaf_reg=3,
        verbose=0,
        random_seed=42,
        class_weights=[float(class_weight_dict[0]),
                       float(class_weight_dict[1]),
                       float(class_weight_dict[2])]
    ),

    # -----------------------------
    # SVM (multiclass)
    # -----------------------------
    "SVM": SVC(
        C=1.0,
        kernel='rbf',
        degree=3,
        gamma='scale',
        coef0=0.0,
        shrinking=True,
        probability=False,
        tol=1e-3,
        cache_size=200,
        class_weight=class_weight_dict,
        verbose=False,
        max_iter=-1,
        random_state=42
    ),

    # -----------------------------
    # CART / Decision Tree
    # -----------------------------
    "CART": DecisionTreeClassifier(
        criterion='gini',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        class_weight=class_weight_dict,  # handles multiclass
        random_state=42
    ),

    # -----------------------------
    # AdaBoost (with DecisionTree base)
    # -----------------------------
    "AdaBoost": AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=1),
        n_estimators=100,
        learning_rate=1.0,
        random_state=42
    ),

    # -----------------------------
    # Bagging (with DecisionTree base)
    # -----------------------------
    "Bagging": BaggingClassifier(
        estimator=DecisionTreeClassifier(),
        n_estimators=100,
        max_samples=1.0,
        max_features=1.0,
        bootstrap=True,
        bootstrap_features=False,
        n_jobs=-1,
        random_state=42
    )
}

In [29]:
from sklearn.metrics import (
    balanced_accuracy_score, f1_score, classification_report,
    confusion_matrix, jaccard_score, roc_auc_score
)
import numpy as np
import json
import joblib
import os
import pandas as pd

all_metrics = []

for name, clf in CLASSIFIERS.items():
    print(f"\n🔍 Training: {name}")

    # Cross-Validated Predictions
    y_pred_cv = cross_val_predict(clf, X_train_val, y_train_val, cv=cv, method="predict")

    # --- If classifier supports probability estimates ---
    try:
        y_prob_cv = cross_val_predict(clf, X_train_val, y_train_val, cv=cv, method="predict_proba")
        y_prob_test = clf.fit(X_train_val, y_train_val).predict_proba(X_test)
    except Exception:
        y_prob_cv = None
        y_prob_test = None

    # Evaluation on Train-Val (Cross-Validation Results)
    balanced_acc_cv = balanced_accuracy_score(y_train_val, y_pred_cv)
    f1_macro_cv = f1_score(y_train_val, y_pred_cv, average='macro')
    jaccard_cv = jaccard_score(y_train_val, y_pred_cv, average='macro')

    # ROC-AUC (only if probabilities available)
    if y_prob_cv is not None:
        try:
            roc_auc_cv = roc_auc_score(y_train_val, y_prob_cv, multi_class='ovr')
        except Exception:
            roc_auc_cv = np.nan
    else:
        roc_auc_cv = np.nan

    report_cv = classification_report(y_train_val, y_pred_cv, output_dict=True)
    cm_cv = confusion_matrix(y_train_val, y_pred_cv).tolist()

    # Retrain on full Train/Val set (after CV)
    clf.fit(X_train_val, y_train_val)

    # Evaluate on Test Set
    y_pred_test = clf.predict(X_test)
    balanced_acc_test = balanced_accuracy_score(y_test, y_pred_test)
    f1_macro_test = f1_score(y_test, y_pred_test, average='macro')
    jaccard_test = jaccard_score(y_test, y_pred_test, average='macro')

    if y_prob_test is not None:
        try:
            roc_auc_test = roc_auc_score(y_test, y_prob_test, multi_class='ovr')
        except Exception:
            roc_auc_test = np.nan
    else:
        roc_auc_test = np.nan

    report_test = classification_report(y_test, y_pred_test, output_dict=True)
    cm_test = confusion_matrix(y_test, y_pred_test).tolist()

    # Collect Metrics
    metric = {
        "classifier": name,
        "balanced_accuracy_cv": balanced_acc_cv,
        "f1_macro_cv": f1_macro_cv,
        "jaccard_cv": jaccard_cv,
        "roc_auc_cv": roc_auc_cv,
        "balanced_accuracy_test": balanced_acc_test,
        "f1_macro_test": f1_macro_test,
        "jaccard_test": jaccard_test,
        "roc_auc_test": roc_auc_test,
        "cross_val_report": json.dumps(report_cv),
        "test_report": json.dumps(report_test),
        "cross_val_confusion_matrix": json.dumps(cm_cv),
        "test_confusion_matrix": json.dumps(cm_test)
    }

    all_metrics.append(metric)

    print(metric)

    # Save model
    joblib.dump(clf, os.path.join(OUTPUT_PATH, f"{name}_model.pkl"))

# Save Metrics
metrics_df = pd.DataFrame(all_metrics)
metrics_df.to_csv(os.path.join(OUTPUT_PATH, "all_classifiers_metrics.csv"), index=False)

print("\n✅ All classifiers trained, metrics saved.")


🔍 Training: RandomForest
{'classifier': 'RandomForest', 'balanced_accuracy_cv': np.float64(0.7275947573373559), 'f1_macro_cv': 0.8041523654538819, 'jaccard_cv': np.float64(0.6863154117040127), 'roc_auc_cv': np.float64(0.9781587772352897), 'balanced_accuracy_test': np.float64(0.8146748860138812), 'f1_macro_test': 0.8623357576845949, 'jaccard_test': np.float64(0.7658254118582436), 'roc_auc_test': np.float64(0.969599936696607), 'cross_val_report': '{"0": {"precision": 1.0, "recall": 0.6077922077922078, "f1-score": 0.7560581583198708, "support": 385.0}, "1": {"precision": 0.9097848716169327, "recall": 0.9950664136622391, "f1-score": 0.9505165851005981, "support": 2635.0}, "2": {"precision": 0.9017341040462428, "recall": 0.5799256505576208, "f1-score": 0.7058823529411765, "support": 269.0}, "accuracy": 0.9157798723016114, "macro avg": {"precision": 0.9371729918877252, "recall": 0.7275947573373559, "f1-score": 0.8041523654538819, "support": 3289.0}, "weighted avg": {"precision": 0.919686716

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


{'classifier': 'XGBoost', 'balanced_accuracy_cv': np.float64(0.8865264576543259), 'f1_macro_cv': 0.9030508693169071, 'jaccard_cv': np.float64(0.8271415474831603), 'roc_auc_cv': np.float64(0.9855106646480124), 'balanced_accuracy_test': np.float64(0.8913150073639001), 'f1_macro_test': 0.9016168068444629, 'jaccard_test': np.float64(0.8257145609501789), 'roc_auc_test': np.float64(0.9840211566482965), 'cross_val_report': '{"0": {"precision": 0.9700996677740864, "recall": 0.7584415584415585, "f1-score": 0.8513119533527697, "support": 385.0}, "1": {"precision": 0.9592592592592593, "recall": 0.9829222011385199, "f1-score": 0.9709465791940018, "support": 2635.0}, "2": {"precision": 0.8576388888888888, "recall": 0.9182156133828996, "f1-score": 0.8868940754039497, "support": 269.0}, "accuracy": 0.9513529948312557, "macro avg": {"precision": 0.9289992719740782, "recall": 0.8865264576543259, "f1-score": 0.9030508693169071, "support": 3289.0}, "weighted avg": {"precision": 0.9522168991645737, "recal

In [32]:
metrics_df[["classifier","balanced_accuracy_cv","f1_macro_cv","jaccard_cv","roc_auc_cv","balanced_accuracy_test","f1_macro_test","jaccard_test","roc_auc_test"]]

Unnamed: 0,classifier,balanced_accuracy_cv,f1_macro_cv,jaccard_cv,roc_auc_cv,balanced_accuracy_test,f1_macro_test,jaccard_test,roc_auc_test
0,RandomForest,0.727595,0.804152,0.686315,0.978159,0.814675,0.862336,0.765825,0.9696
1,XGBoost,0.886526,0.903051,0.827142,0.985511,0.891315,0.901617,0.825715,0.984021
2,CatBoost,0.883669,0.788804,0.661672,0.958348,0.814959,0.731491,0.59363,0.945394
3,SVM,0.892316,0.840259,0.733268,,0.851816,0.797185,0.674934,
4,CART,0.847827,0.837571,0.728734,0.882314,0.84531,0.822341,0.709392,0.885832
5,AdaBoost,0.740789,0.768269,0.641939,0.906347,0.748105,0.757909,0.634525,0.882128
6,Bagging,0.882846,0.884237,0.797478,0.970775,0.892241,0.879775,0.791078,0.967361


In [33]:
metrics = metrics_df[["classifier","balanced_accuracy_cv","f1_macro_cv","jaccard_cv","roc_auc_cv","balanced_accuracy_test","f1_macro_test","jaccard_test","roc_auc_test"]]

In [35]:
# Define your weights
weights = {
    'balanced_accuracy_cv': 0.15,
    'f1_macro_cv': 0.15,
    'jaccard_cv': 0.1,
    'roc_auc_cv': 0.1,
    'balanced_accuracy_test': 0.2,
    'f1_macro_test': 0.2,
    'jaccard_test': 0.05,
    'roc_auc_test': 0.05
}

# Compute weighted score
metrics['model_score'] = (
    weights['balanced_accuracy_cv'] * metrics['balanced_accuracy_cv'] +
    weights['f1_macro_cv'] * metrics['f1_macro_cv'] +
    weights['balanced_accuracy_test'] * metrics['balanced_accuracy_test'] +
    weights['f1_macro_test'] * metrics['f1_macro_test'] +
    weights['jaccard_test'] * metrics['jaccard_test'] +
    weights['roc_auc_test'] * metrics['roc_auc_test']
)

# Sort by score
ranked = metrics.sort_values(by='model_score', ascending=False)
ranked[['classifier', 'model_score']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metrics['model_score'] = (


Unnamed: 0,classifier,model_score
1,XGBoost,0.71751
6,Bagging,0.707387
4,CART,0.666101
0,RandomForest,0.651935
2,CatBoost,0.637112
5,AdaBoost,0.603394
3,SVM,


# **3. Inference pipeline**

In [36]:
from sklearn.pipeline import Pipeline
import joblib

In [37]:
best_model = joblib.load("/content/outputs_2025_13_17_45/XGBoost_model.pkl")

inference_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', best_model)
])

joblib.dump(inference_pipeline, "job_contract_pipeline.pkl")

['job_contract_pipeline.pkl']

In [38]:
inference_pipeline = joblib.load("job_contract_pipeline.pkl")

In [39]:
import pandas as pd

new_job = pd.DataFrame([
    {
        'intitule': "Développeur Python",
        'description': "Envie d'un poste où votre code ne reste pas dans votre bulle, mais sert concrètement tous les métiers d'une entreprise ?\n\nVous serez au cœur de la conception, du développement et du déploiement de solutions logicielles sur mesure, impactant directement tous les services de notre entreprise (logistique, transport, finance, comptabilité, RH...).\n\nCe que vous ferez au quotidien :\n\nDe l'idée à la réalité : vous analyserez les besoins, rédigerez des spécifications techniques claires et proposerez des solutions pertinentes et optimales pour nos applications web et logicielles.\n\n\nDéveloppement & Qualité : vous assurez la réalisation, les tests, la mise en service et la maintenance, le tout dans le respect des délais impartis et de la qualité.\n\n\nProjets Transversaux / Partenaire des Métiers : vous collaborez sur une multitude de projets, apportant votre expertise à l'ensemble de nos métiers.\n\n\nCe poste, basé à CHARTRES est à pourvoir dans le cadre d'un CDI.\n\nLa rémunération brute annuelle est à négocier selon votre expérience.",
        'natureContrat': "Contrat travail",
        'experienceExige': "D",
        'entrepriseAdaptee': False,
        'employeurHandiEngage': False,
        'entreprise.nom': "EXAMPLE_CORP",
        'lieuTravail.commune': "",
        'secteurActivite': 78.0,
        'nombrePostes': 1,
        'lieuTravail.latitude': 48.85,
        'lieuTravail.longitude': 2.35,
        'lieuTravail.codePostal': 75001
    }
])

In [40]:
pred_numeric = inference_pipeline.predict(new_job)

print("Predicted contract type:", pred_numeric[0])

Predicted contract type: 1
