# Implémentation de l'architecture des réseaux de neurones

*http://exo7.emath.fr/cours/livre-deepmath.pdf* 

## Chargement des modules et données

In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD

from sklearn import preprocessing, model_selection

In [2]:
df = pd.read_csv('market_data_pd.csv',sep=';')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Reference Security,Ticker_x,Bid Price,Ask Price,Mid Price,Issuer Name,Cpn,Maturity,Ticker_y,...,Tot Debt / Tot Capital,Tot Loans to Tot Assets,Tot Risk-Based Cap,Earnings Assets / Int Bear Liab,Total Expenses to Average Earning Assets,PD_1y,BCLASS Level 1,BCLASS Level 2,BCLASS Level 3,BCLASS Level 4
0,0,AU053739 Corp,AABHFH,100.086,100.098,100.092,Alandsbanken Abp,0.5,09/13/2021,AABHFH,...,87.2526,72.7382,275.5,,,0.053266,Corporate,Financial Institutions,Banking,Banking
1,1,BP432479 Corp,AAFFP,101.45,102.108,101.779,Afflelou SAS,4.25,05/19/2026,AAFFP,...,77.8533,,,,,0.068537,Corporate,Industrial,Consumer Cyclical,Retailers
2,2,BP486833 Corp,AAFFP,101.375,102.375,101.875,Afflelou SAS,8.0,05/19/2027,AAFFP,...,77.8533,,,,,0.119209,Corporate,Industrial,Consumer Cyclical,Retailers
3,3,EJ102177 Corp,AALLN,102.36,102.801,102.5805,Anglo American Capital PLC,3.5,03/28/2022,AALLN,...,80.4119,,,,,0.030968,Corporate,Industrial,Basic Industry,Metals and Mining
4,4,EK150567 Corp,AALLN,105.728,105.887,105.8075,Anglo American Capital PLC,3.25,04/03/2023,AALLN,...,80.4119,,,,,0.016911,Corporate,Industrial,Basic Industry,Metals and Mining


In [None]:
df.dtypes

In [None]:
#mettre les ratios financiers
df1 = df[["Common Eqty / Tot Assets",'Debt / Common Equity','BCLASS Level 3',"PD_1y"]]
#df1.replace(np.nan, 0.0,inplace=True)

In [None]:
df1 = df1.dropna(how = 'any') 
df1.head()
#on peut pas faire tourner l'algo avec des valeurs manquantes. Voir techniques pour les remplacer.

In [None]:
df1.shape

# Traitement des données

In [None]:
X_full = np.array(df1.drop(columns=['PD_1y']))
y_full = np.array(df1['PD_1y'])



In [None]:
features = list(df1.drop(columns=['PD_1y']).columns)

discrete_features = ['BCLASS Level 3']
continuous_features = [s for s in features if not s in discrete_features]


continuous_features_idx = [features.index(feat_name) for feat_name in continuous_features]
discrete_features_idx = [features.index(feat_name) for feat_name in discrete_features]


def traitement(X):
    
    ohe = preprocessing.OneHotEncoder(categories='auto',sparse=False)
    
    std_scaler = preprocessing.StandardScaler().fit(X[:,continuous_features_idx])
    X_continuous_scaled = std_scaler.transform(X[:,continuous_features_idx])
    
    X_processed = np.hstack((ohe.fit_transform(X[:,discrete_features_idx]), X_continuous_scaled))
    
    return X_processed



In [None]:
X_processed = traitement(X_full)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X_processed, y_full, test_size=0.20,
                                                                    #stratify=y_full, # stratification
                                                                    random_state=92)

In [None]:
#Préparation bases de train et de test 

#from sklearn.model_selection import train_test_split

#X_train, X_test, y_train, y_test = train_test_split(df1.drop("PD_1y",axis=1),df1["PD_1y"],test_size=0.2, random_state=42)

**Variables quanti : standardisation**

In [None]:
num_cols = X_train.columns[X_train.dtypes.apply(lambda c: np.issubdtype(c, np.number))]

In [None]:
#Standardisation des données 

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler(with_mean=True,with_std=True)
scaler.fit(X_train[num_cols])
    

x_train = X_train
x_train[num_cols] = scaler.transform(X_train[num_cols])
x_train = pd.DataFrame(x_train, index=X_train.index, columns=X_train.columns)

x_test = X_test
x_test[num_cols] = scaler.transform(X_test[num_cols])
x_test = pd.DataFrame(x_test, index=X_test.index, columns=X_test.columns)

In [None]:
x_test

In [None]:
x_train['Debt / Common Equity'].mean()

**Variables quali : binariser**

In [None]:
# BINARISER
x_train = pd.get_dummies(x_train)
x_test = pd.get_dummies(x_test)
x_test

In [None]:
x_train

## Réseaux de neurones 

In [None]:
#RESEAUX DE NEURONES 
#http://eric.univ-lyon2.fr/~ricco/tanagra/fichiers/fr_Tanagra_Packages_Python_for_Deep_Learning.pdf

#Explications très bien dans ce document : 

#http://exo7.emath.fr/cours/livre-deepmath.pdf 

#très bon document (parties Python, keras avec les codes et explications)

In [None]:
modele = Sequential()
modele.add(Dense(units=7,input_dim=23,activation="sigmoid")) #6 is good
modele.add(Dense(units=1,activation="sigmoid"))
#modele.add(Dropout(0.3))
learning_rate = 0.10

sgd = SGD(learning_rate)

#dim première couche = nombre de variables du df 

J'ai pris la fonction sigmoïde car elle est continue et à valeurs dans [0,1] (comme les probabilités de défaut). Régression logit. 

In [None]:
modele.compile(loss='mean_squared_error',optimizer=sgd,metrics=[tf.keras.metrics.MeanSquaredError()])

In [None]:
print(modele.summary())

In [None]:
history = modele.fit(X_train, y_train,         # Train the model using the training set...
          batch_size=10, epochs=9,
          verbose=1, validation_split=0.35,steps_per_epoch=15)           # ...holding out 40% of the data for validation

# --------------------------------------
# Evaluation
# --------------------------------------
#for loss_name, loss_value in list(zip(modele.metrics_names, modele.evaluate(x_test, y_test, verbose=1))):
 #   print('The final {} on the TEST set is: {:.2f}.'.format(loss_name, loss_value)) # Evaluate the trained model on the test set!

    
import matplotlib.pyplot as plt

# --------------------------------------
# Loss functions evolution
# --------------------------------------
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='right')
plt.show()

In [None]:
X_test

In [None]:
#Résultats 

resultat = modele.evaluate(x_test, y_test, verbose=1)
print("Valeur de l'erreur sur les données de test (loss):", resultat[0])
print('Précision sur les données de test (MSE):', resultat[1])

## Observation des résultats 

In [None]:
def ecart_relatif(row) : 
    théo = row['Théorique']
    préd = row['Prédiction']
    return abs(théo - préd) / théo

In [None]:
prediction = pd.DataFrame(data= modele.predict(X_test) ,columns=['Prédiction'])
théorique = pd.DataFrame(data= y_test ,columns=['Théorique'])
prediction, théorique = prediction.reset_index() , théorique.reset_index()

results = pd.merge(théorique, prediction, on = 'index').drop(columns=['index'])
results["Ecart_relatif"] = results.apply(ecart_relatif,axis=1)

In [None]:
results.head()

In [None]:
results["Ecart_relatif"].mean()

In [None]:
print("L'écart relatif moyen est de " + str(results["Ecart_relatif"].mean() * 100) + "%")

In [None]:
results["Ecart_relatif"] = results.apply(ecart_relatif,axis=1)

In [None]:
results.plot.scatter("Théorique","Prédiction")

---------------

In [None]:
df2 = df1.sort_values('PD_1y')

In [None]:
for idx, row in df2.iterrows():
    print('+==========')
    print(row)

In [None]:
arr = np.array([410.513,79.0061,80.4119]).reshape(-1,1) #ligne 1 et 2
scaler = StandardScaler(with_mean=True,with_std=True)
scaler.fit(arr)

x = scaler.transform(arr)

In [None]:
x = np.array([-0.079178,-0.424929,1.104960]).reshape(-1,1)

In [None]:
modele.predict(x.T)

In [None]:
y_test.head()

In [None]:
x_test.head()