# Apply fedas_classifier

This notebook allows to train a fedas FedasClassifier on a given dataset and use it to make predictions on a new dataset.

In [13]:
import pandas as pd
from fedas_classifier import FedasClassifier


## Train classifier

In [14]:
fc = FedasClassifier()

raw_train = pd.read_csv('data_technical_test/train_technical_test.csv', 
    na_values="",
    dtype={
        "incorrect_fedas_code": object, 
        "correct_fedas_code": object, 
    },
    parse_dates=["avalability_start_date", "avalability_end_date"])

train = raw_train.copy(deep=True).drop(columns=["correct_fedas_code"])
target = raw_train["correct_fedas_code"]

fc.fit(train, target)

Normalizing features...
Normalizing target...
Fitting model...
Done.


Get prediction on train data (for testing purpose):

In [15]:
results = fc.predict(train[:20])

Normalizing features...
Predicting fedas codes...


In [16]:
comparison = pd.concat((results, target[:20]), axis=1)
comparison['correct'] = comparison['fedas'] == comparison['correct_fedas_code']
comparison

Unnamed: 0,fedas,confidence,correct_fedas_code,correct
0,378101,0.822763,378101,True
1,364308,0.9408,364308,True
2,175890,0.552432,175890,True
3,224118,1.0,224118,True
4,115944,0.133752,115944,True
5,200124,0.80578,200124,True
6,175850,1.0,175850,True
7,375952,0.245578,375022,False
8,315933,0.56842,315933,True
9,141791,0.564114,137791,False


In [17]:
comparison[comparison['correct'] == False]

Unnamed: 0,fedas,confidence,correct_fedas_code,correct
7,375952,0.245578,375022,False
9,141791,0.564114,137791,False
10,200127,0.049403,200621,False


In [18]:
comparison[comparison['confidence'] < 0.5]

Unnamed: 0,fedas,confidence,correct_fedas_code,correct
4,115944,0.133752,115944,True
7,375952,0.245578,375022,False
10,200127,0.049403,200621,False
11,200297,0.46457,200297,True
18,108733,0.188417,108733,True
19,314125,0.153459,314125,True


## Make predictions on test set

In [19]:
test = pd.read_csv('data_technical_test/test_technical_test.csv', 
    na_values="",
    dtype={
        "incorrect_fedas_code": object, 
        "correct_fedas_code": object, 
    },
    parse_dates=["avalability_start_date", "avalability_end_date"])

test_result = fc.predict(test[:20])
test_result

Normalizing features...
Predicting fedas codes...


Unnamed: 0,fedas,confidence
0,246124,0.721465
1,232747,0.666469
2,246398,0.9801
3,278115,0.466167
4,101733,0.190882
5,135798,0.653562
6,200367,0.585284
7,232124,1.0
8,200901,0.107883
9,275124,1.0


In [20]:
test.loc[test_result[test_result['confidence'] < 0.5].index]

Unnamed: 0,brand,model_code,model_label,commercial_label,incorrect_fedas_code,article_main_category,article_type,article_detail,comment,avalability_start_date,...,shipping_date,eco_participation,eco_furniture,multiple_of_order,minimum_multiple_of_order,net_weight,raw_weight,volume,size,accurate_gender
3,brand_397,1344333,ARMOUR MID KEYHOLE GRAPHIC,,278052.0,TRAINING,FEMME,ARMOUR MID KEYHOLE,,2021-01-20,...,,0.0,0.0,1,0,0.0,0.0,0.0,L,FE
4,brand_314,278115,POLE RENTAL KID EXCLUSIVE 2 FR,SCO POLE RENTAL KID EXCLU,101731.0,WINTERSPORTS/ACCESSORTS,BATONS DE SKI,,LOCATION,2020-09-01,...,20200224.0,0.0,0.0,1,1,0.32,0.45,0.0,070,UE
8,brand_102,3130705,MAILLOT TANARO 2.0,,232904.0,FOOTBALL,ADULTES,,,NaT,...,,0.0,0.0,0,0,0.0,0.0,0.0,L,HO
13,brand_182,KI0633,SAC FOURRE TOUT FORME TUBE,SAC FOURRE TOUT FORME TUB,,COLLECTIVITES,SAC,,,NaT,...,20200415.0,0.0,0.0,1,1,0.36,0.0,2.67,TU,UN
15,brand_257,1183A206,TIGER HORIZONIA,,314984.0,SPORTSTYLE,UNISEXE ADULTE,TIGER HORIZONIA,,2020-12-01,...,,0.0,0.0,1,0,0.0,0.0,0.0,10,HO
17,brand_366,9205,SHAKER SPORT,SHAKER SPORT,131199.0,FITNESS,PETIT ACCESSOIRE,UNISEXE ADULTE,,2019-11-30,...,20191209.0,0.0,0.0,1,1,0.14,0.15,0.0,22CM,UN
18,brand_17,2111A967,SB GLOBAL HOME TOP REPLICA,,236154.0,FOOTBALL,HOMME,SB GLOBAL HOME TOP,,2021-02-01,...,,0.0,0.0,1,0,0.0,0.0,0.0,2XL,HO
