In [1]:
upstream = None
product = None

In [2]:
# Parameters
upstream = {"Preprocess features": {"nb": "/Users/bruno/Documents/Hackaton_danone/output/process_data.ipynb", "train_csv": "/Users/bruno/Documents/Hackaton_danone/data/train.csv", "test_csv": "/Users/bruno/Documents/Hackaton_danone/data/test.csv"}}
product = {"nb": "/Users/bruno/Documents/Hackaton_danone/output/train_nn.ipynb", "result": "/Users/bruno/Documents/Hackaton_danone/predictions/result_nn.json"}


In [3]:
import json
import pickle
import warnings
from pathlib import Path

import numpy as np

In [4]:
import pandas as pd
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR, ExponentialLR, ReduceLROnPlateau
from torchmetrics import F1Score

warnings.filterwarnings('ignore')

In [5]:
cols = ['is_beverage', 'non_recyclable_and_non_biodegradable_materials_count', 
        'est_co2_agriculture', 'est_co2_consumption', 
        'est_co2_distribution', 'est_co2_packaging', 'est_co2_processing', 
        'est_co2_transportation']
target_col = 'ecoscore_grade'

In [6]:
train = pd.read_csv(upstream['Preprocess features']['train_csv'], usecols=cols + [target_col])
test = pd.read_csv(upstream['Preprocess features']['test_csv'], usecols=cols)

In [7]:
X_train, y_train = train[cols], train[target_col]
X_test = test[cols]
for i in cols:
    X_train[i] = X_train[i].astype(float)
    X_test[i] = X_test[i].astype(float)

In [8]:
batch_size = 32 * 2
train_loader = DataLoader(
    TensorDataset(torch.tensor(X_train.values, dtype=torch.float32), 
                  torch.tensor(y_train.values, dtype=torch.float32)), 
    batch_size=batch_size, shuffle=True,
)

class Net(nn.Module):
    activation_function: nn.modules.activation = None

    def __init__(self, input_shape, output_shape, activation_function, hidden_layers: list = [160], dropout=0.6):
        super(Net, self).__init__()

        activation_function = getattr(nn, activation_function)
        layers = []
        hidden_layers.insert(0, input_shape)
        for i, n in enumerate(hidden_layers[0:-1]):
            m = int(hidden_layers[i + 1])
            layers.append(nn.Linear(n, m))
            layers.append(nn.BatchNorm1d(m))
            layers.append(nn.Dropout(dropout))
            layers.append(activation_function())
        layers.append(nn.Linear(hidden_layers[-1], output_shape))
        layers.append(nn.Sigmoid())
        layers = nn.Sequential(*layers)
        self.layers = layers
    def forward(self, x):
        x = self.layers(x)
        return x

    def predict(self, x):
        x = torch.from_numpy(x).float().to("cpu")
        outputs = self(x)
        return outputs

In [9]:
metric_f1 = F1Score(task='multiclass',num_classes=len(y_train.unique()), average="weighted", multiclass=True)
network = Net(X_train.shape[1], len(y_train.unique()), 'ReLU6', [128, 64, 32, 16])
optimizer = optim.Adam(network.parameters(), lr=0.05)
criterion = nn.CrossEntropyLoss()
scheduler = ReduceLROnPlateau(optimizer, "max", factor=0.9)
epochs = 100

In [10]:
for epoch in range(epochs):
    running_loss = 0
    f1 = 0
    
    for (data, target) in train_loader:
        optimizer.zero_grad()
        output = network(data)

        target = nn.functional.one_hot(target.to(torch.int64), num_classes=len(y_train.unique()))
        loss = criterion(output.float(), target.float())

        ## Do backward
        loss.backward()
        optimizer.step()

        f1 += metric_f1(torch.argmax(output, axis=1), torch.argmax(target.int(), axis=1)).item()
        running_loss += loss.item()
    f1 /= len(train_loader)
    running_loss /= len(train_loader)
    # scheduler.step(f1)
    print(f'Epoch: {epoch} | F1: {f1} | Loss: {running_loss}')

Epoch: 0 | F1: 0.4339703854424822 | Loss: 1.3191137123692986


Epoch: 1 | F1: 0.4400420591143743 | Loss: 1.2859564313859295


Epoch: 2 | F1: 0.4347983718466905 | Loss: 1.282749178950772


Epoch: 3 | F1: 0.43011116547270056 | Loss: 1.2797922195832423


Epoch: 4 | F1: 0.42188831670152627 | Loss: 1.2829777843381729


Epoch: 5 | F1: 0.41246417626464293 | Loss: 1.2980497965783429


Epoch: 6 | F1: 0.41717854503290785 | Loss: 1.2932448518788155


Epoch: 7 | F1: 0.4181084892501129 | Loss: 1.2920946848173083


Epoch: 8 | F1: 0.4193991836777494 | Loss: 1.2876636261588956


Epoch: 9 | F1: 0.4246303115062918 | Loss: 1.279106532137818


Epoch: 10 | F1: 0.4228222886660348 | Loss: 1.2834293037835807


Epoch: 11 | F1: 0.4232635806126097 | Loss: 1.2857409050128212


Epoch: 12 | F1: 0.4254309602218903 | Loss: 1.2893724792574082


Epoch: 13 | F1: 0.4167999563955822 | Loss: 1.290475544388309


Epoch: 14 | F1: 0.4232847837765524 | Loss: 1.2898297854727763


Epoch: 15 | F1: 0.41781305617715686 | Loss: 1.2938252226706664


Epoch: 16 | F1: 0.42023538441562946 | Loss: 1.2881062264822742


Epoch: 17 | F1: 0.42275337564067605 | Loss: 1.281717264213445


Epoch: 18 | F1: 0.4232753379944643 | Loss: 1.2878269472736523


Epoch: 19 | F1: 0.42975569786103957 | Loss: 1.2806048276234259


Epoch: 20 | F1: 0.42666040242083964 | Loss: 1.2808778307920585


Epoch: 21 | F1: 0.4249534210635841 | Loss: 1.283187209462827


Epoch: 22 | F1: 0.4144288653427838 | Loss: 1.2872610359104133


Epoch: 23 | F1: 0.4089890743142988 | Loss: 1.2877621625098714


Epoch: 24 | F1: 0.4082118851915459 | Loss: 1.2885549580392661


Epoch: 25 | F1: 0.41128326728482917 | Loss: 1.284355735851943


Epoch: 26 | F1: 0.41125932290144496 | Loss: 1.2909192647670675


Epoch: 27 | F1: 0.41132676016333647 | Loss: 1.2835557628994338


Epoch: 28 | F1: 0.4020305773688972 | Loss: 1.287882030375896


Epoch: 29 | F1: 0.4078982959678568 | Loss: 1.2852535642729215


Epoch: 30 | F1: 0.40616464980540834 | Loss: 1.2986034235339954


Epoch: 31 | F1: 0.4076028972681315 | Loss: 1.2978197855452087


Epoch: 32 | F1: 0.4095580335135109 | Loss: 1.292257293601709


Epoch: 33 | F1: 0.41520243503747545 | Loss: 1.2891491995267341


Epoch: 34 | F1: 0.4122244321678314 | Loss: 1.2909807603052057


Epoch: 35 | F1: 0.41376658734734073 | Loss: 1.2865291373130003


Epoch: 36 | F1: 0.4064012315192837 | Loss: 1.2884641721204746


Epoch: 37 | F1: 0.40852162685313836 | Loss: 1.291779813225284


Epoch: 38 | F1: 0.40822316185097024 | Loss: 1.2942101169217584


Epoch: 39 | F1: 0.4206732454841122 | Loss: 1.2896124753483966


Epoch: 40 | F1: 0.4176300546508625 | Loss: 1.2956587072530408


Epoch: 41 | F1: 0.4051065247483049 | Loss: 1.2956952953631162


Epoch: 42 | F1: 0.404905016619735 | Loss: 1.295005020919753


Epoch: 43 | F1: 0.40912325575124997 | Loss: 1.289775189812198


Epoch: 44 | F1: 0.4144804749203606 | Loss: 1.286235747527491


Epoch: 45 | F1: 0.42249552178785116 | Loss: 1.286019746876933


Epoch: 46 | F1: 0.4141032292159057 | Loss: 1.2887187611105984


Epoch: 47 | F1: 0.40968245372998935 | Loss: 1.2906464799050172


Epoch: 48 | F1: 0.4146152232413643 | Loss: 1.295306913326123


Epoch: 49 | F1: 0.41030755712210765 | Loss: 1.291057440400855


Epoch: 50 | F1: 0.41590185221170356 | Loss: 1.295539952860288


Epoch: 51 | F1: 0.4051844784361453 | Loss: 1.30035072053137


Epoch: 52 | F1: 0.40925992106550313 | Loss: 1.2947498186965662


Epoch: 53 | F1: 0.4050865892800817 | Loss: 1.298428317520516


Epoch: 54 | F1: 0.40552991226406915 | Loss: 1.3013780863007154


Epoch: 55 | F1: 0.4073712626940634 | Loss: 1.2932933323953781


Epoch: 56 | F1: 0.4070980198269973 | Loss: 1.2954531421690632


Epoch: 57 | F1: 0.4151988059190885 | Loss: 1.2924534616294814


Epoch: 58 | F1: 0.4162863535932237 | Loss: 1.2930895141297323


Epoch: 59 | F1: 0.41396120026067723 | Loss: 1.3000896664484878


Epoch: 60 | F1: 0.41340630460370537 | Loss: 1.2894692461183466


Epoch: 61 | F1: 0.40640906861589 | Loss: 1.2974081639131885


Epoch: 62 | F1: 0.40327009010168674 | Loss: 1.2980125491604484


Epoch: 63 | F1: 0.40923169913467455 | Loss: 1.2990687807644803


Epoch: 64 | F1: 0.4041818040737345 | Loss: 1.305819995198513


Epoch: 65 | F1: 0.40966483246695046 | Loss: 1.2992000645655064


Epoch: 66 | F1: 0.40849307631970916 | Loss: 1.3018693199918314


Epoch: 67 | F1: 0.41650052597186316 | Loss: 1.3031268595186478


Epoch: 68 | F1: 0.4195278922290158 | Loss: 1.2997423392132017


Epoch: 69 | F1: 0.40786249405401614 | Loss: 1.303569361476079


Epoch: 70 | F1: 0.4041309158769122 | Loss: 1.3002248618500365


Epoch: 71 | F1: 0.41125878897364154 | Loss: 1.3043635337630664


Epoch: 72 | F1: 0.41226547759917614 | Loss: 1.3066065296804978


Epoch: 73 | F1: 0.40708258505796363 | Loss: 1.3091864220203797


Epoch: 74 | F1: 0.40936624135342115 | Loss: 1.3056957557157505


Epoch: 75 | F1: 0.41457768401668116 | Loss: 1.3070397600074488


Epoch: 76 | F1: 0.41439288895737175 | Loss: 1.3056202702727056


Epoch: 77 | F1: 0.4175672034270193 | Loss: 1.3001408763458393


Epoch: 78 | F1: 0.42022971822623095 | Loss: 1.2990057219756892


Epoch: 79 | F1: 0.42346190460071975 | Loss: 1.2969721204664078


Epoch: 80 | F1: 0.42566310902314686 | Loss: 1.2956509001415932


Epoch: 81 | F1: 0.42840596800384345 | Loss: 1.295864834375908


Epoch: 82 | F1: 0.4273324811294035 | Loss: 1.297918755583968


Epoch: 83 | F1: 0.4286321455532788 | Loss: 1.2995033560355016


Epoch: 84 | F1: 0.42962798951593645 | Loss: 1.3030638691106458


Epoch: 85 | F1: 0.4199996534849237 | Loss: 1.3044345894474194


Epoch: 86 | F1: 0.4257920027876193 | Loss: 1.3032960489483698


Epoch: 87 | F1: 0.4276975197660411 | Loss: 1.2971843206809341


Epoch: 88 | F1: 0.42975108252346883 | Loss: 1.291682991879118


Epoch: 89 | F1: 0.42774776346478727 | Loss: 1.2949159847446745


Epoch: 90 | F1: 0.42962270668862057 | Loss: 1.2950014629246998


Epoch: 91 | F1: 0.4381303079197743 | Loss: 1.2903768789548815


Epoch: 92 | F1: 0.42791688492510216 | Loss: 1.2950476850468688


Epoch: 93 | F1: 0.42060510192546374 | Loss: 1.3001205050872147


Epoch: 94 | F1: 0.4187040776471419 | Loss: 1.3044717074171897


Epoch: 95 | F1: 0.41900136175513997 | Loss: 1.3086167157061992


Epoch: 96 | F1: 0.4105415226308846 | Loss: 1.3105753115349752


Epoch: 97 | F1: 0.40057195174547794 | Loss: 1.3193431332067478


Epoch: 98 | F1: 0.4006737403434478 | Loss: 1.3298381479239902


Epoch: 99 | F1: 0.3970430259781381 | Loss: 1.3248491148275832


In [11]:
network.eval()
pred = np.argmax(network.predict(X_test.values).detach().numpy(), axis=1)


Path(product['result'])\
    .write_text(json.dumps(
        {'target': {index: int(i) 
                    for index, i in enumerate(pred)}
         }))

12894