In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, classification_report

class FinfrogTrainer:
    def __init__(self, data_path):
        self.df = pd.read_csv(data_path, low_memory=False)
        self.model = None

    def preprocess(self):
        # On simplifie : On prédit si 'loan_status' est 'Fully Paid' ou 'Charged Off' (Défaut)
        self.df = self.df[self.df['loan_status'].isin(['Fully Paid', 'Charged Off'])]
        self.df['target'] = self.df['loan_status'].apply(lambda x: 1 if x == 'Charged Off' else 0)
        
        features = ['loan_amnt', 'term', 'int_rate', 'annual_inc', 'dti', 'open_acc']
        # Note : Il faudrait encoder 'term' en numérique ici
        
        self.X = self.df[features]
        self.y = self.df['target']
        
        return train_test_split(self.X, self.y, test_size=0.2, random_state=42)

    def train(self, X_train, y_train):
        # Hyperparamètres de base pour le risque de crédit
        params = {
            'objective': 'binary',
            'metric': 'auc',
            'learning_rate': 0.05,
            'num_leaves': 31,
            'feature_fraction': 0.8,
            'bagging_fraction': 0.8,
            'is_unbalance': True # Crucial : il y a peu de défauts par rapport aux bons clients
        }
        
        train_data = lgb.Dataset(X_train, label=y_train)
        self.model = lgb.train(params, train_data, num_boost_round=500)
        print("Entraînement terminé !")

# trainer = FinfrogTrainer('data/accepted_2007_to_2018Q4.csv')
# X_train, X_test, y_train, y_test = trainer.preprocess()
# trainer.train(X_train, y_train)

In [6]:
x = '  36 month'
print(float(x.split()[0]))

36.0
