In [2]:
!pip install codecarbon



In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import accuracy_score

#library for carbon emission
from codecarbon import EmissionsTracker


In [4]:
batch_size = 50

In [5]:
class ChurnDataset(Dataset):
 
    def __init__(self, csv_file):
  
        df = pd.read_csv(csv_file)
        
        df = df.drop(["Surname", "CustomerId", "RowNumber"], axis=1)

        # Grouping variable names
        self.categorical = ["Geography", "Gender"]
        self.target = "Exited"

        # One-hot encoding of categorical variables
        self.churn_frame = pd.get_dummies(df, prefix=self.categorical)

        # Save target and predictors
        self.X = self.churn_frame.drop(self.target, axis=1)
        self.y = self.churn_frame["Exited"]
        
        
        scaler = StandardScaler()
        X_array  = scaler.fit_transform(self.X)
        self.X = pd.DataFrame(X_array)

    def __len__(self):
        return len(self.churn_frame)

    def __getitem__(self, idx):
        # Convert idx from tensor to list due to pandas bug (that arises when using pytorch's random_split)
        if isinstance(idx, torch.Tensor):
            idx = idx.tolist()

        return [self.X.iloc[idx].values, self.y[idx]]

In [6]:
def get_CHURN_model():
    model = nn.Sequential(nn.Linear(13, 64), 
                    nn.ReLU(), 
                    nn.Linear(64, 64), 
                    nn.ReLU(), 
                    nn.Linear(64, 1)) 
    return model

In [7]:
def get_dataloader(csv_file, batch_size):
     # Load dataset
    dataset = ChurnDataset(csv_file)

    # Split into training and test
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    trainset, testset = random_split(dataset, [train_size, test_size])
    
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
    
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)
    
    return trainloader, testloader, trainset, testset

In [8]:
def train(trainloader, net, optimizer, n_epochs=100):
     
    device = "cpu"

    # Define the model
    #net = get_CHURN_model()
    net = net.to(device)
    
    #criterion = nn.CrossEntropyLoss() 
    criterion = nn.BCEWithLogitsLoss()


    # Train the net
    loss_per_iter = []
    loss_per_batch = []
    for epoch in range(n_epochs):

        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = net(inputs.float())
            loss = criterion(outputs, labels.float().unsqueeze(1))
            loss.backward()
            optimizer.step()

            # Save loss to plot
            running_loss += loss.item()
            loss_per_iter.append(loss.item())

        
        print("Epoch {} - Training loss: {}".format(epoch, running_loss/len(trainloader))) 
        
        running_loss = 0.0
        
    return net

In [9]:
csv_file = "../data/churn.csv"

trainloader, testloader, train_ds, test_ds = get_dataloader(csv_file, batch_size)

In [10]:

tracker = EmissionsTracker(project_name = "churn_prediction",
                           output_dir = "../output/",
                           measure_power_secs = 15,
                           save_to_file = True)

tracker.start()

net = get_CHURN_model()
optimizer = optim.Adam(net.parameters(), weight_decay=0.0001, lr=0.003)
model = train(trainloader, net, optimizer, 50)

emissions: float = tracker.stop()
print(emissions)

CODECARBON : No CPU tracking mode found. Falling back on CPU constant mode.


[2022-02-26 21:00:05.492 pytorch-1-8-gpu-py3-ml-g4dn-xlarge-60bd0d07a83be181dcf7335baae2:1104 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2022-02-26 21:00:05.526 pytorch-1-8-gpu-py3-ml-g4dn-xlarge-60bd0d07a83be181dcf7335baae2:1104 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
Epoch 0 - Training loss: 0.42982916031032803
Epoch 1 - Training loss: 0.3540937858633697
Epoch 2 - Training loss: 0.34025645079091194
Epoch 3 - Training loss: 0.3379934617318213
Epoch 4 - Training loss: 0.3337463465519249
Epoch 5 - Training loss: 0.33179530426859855
Epoch 6 - Training loss: 0.32722213892266155
Epoch 7 - Training loss: 0.3252848910167813
Epoch 8 - Training loss: 0.32446048059500754
Epoch 9 - Training loss: 0.3215969825163484
Epoch 10 - Training loss: 0.32005199883133173
Epoch 11 - Training loss: 0.31845565321855246
Epoch 12 - Training loss: 0.31571509074419735
Epoch 13 - Training loss: 0.3144164187833667
Epoch

INFO:apscheduler.executors.default:Running job "BaseEmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2022-02-26 21:00:20 UTC)" (scheduled at 2022-02-26 21:00:20.231885+00:00)
INFO:apscheduler.executors.default:Job "BaseEmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2022-02-26 21:00:35 UTC)" executed successfully


Epoch 17 - Training loss: 0.308766683889553
Epoch 18 - Training loss: 0.308532061195001
Epoch 19 - Training loss: 0.3065998086705804
Epoch 20 - Training loss: 0.3096625545993447
Epoch 21 - Training loss: 0.304538294672966
Epoch 22 - Training loss: 0.30369578558020294
Epoch 23 - Training loss: 0.30321342647075655
Epoch 24 - Training loss: 0.3018710108473897
Epoch 25 - Training loss: 0.30227746022865176
Epoch 26 - Training loss: 0.29813844375312326
Epoch 27 - Training loss: 0.296247317455709
Epoch 28 - Training loss: 0.29577001137658954
Epoch 29 - Training loss: 0.2961962307803333
Epoch 30 - Training loss: 0.29479620046913624
Epoch 31 - Training loss: 0.2930365629028529
Epoch 32 - Training loss: 0.2904843362979591
Epoch 33 - Training loss: 0.2911228437907994


INFO:apscheduler.executors.default:Running job "BaseEmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2022-02-26 21:00:35 UTC)" (scheduled at 2022-02-26 21:00:35.231885+00:00)
INFO:apscheduler.executors.default:Job "BaseEmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2022-02-26 21:00:50 UTC)" executed successfully


Epoch 34 - Training loss: 0.2866478441283107
Epoch 35 - Training loss: 0.28569351113401353
Epoch 36 - Training loss: 0.28702406734228136
Epoch 37 - Training loss: 0.28462868379428985
Epoch 38 - Training loss: 0.2842296312563121
Epoch 39 - Training loss: 0.28239488578401506
Epoch 40 - Training loss: 0.2809695449657738
Epoch 41 - Training loss: 0.278782249847427
Epoch 42 - Training loss: 0.2764643343165517
Epoch 43 - Training loss: 0.2761734708212316
Epoch 44 - Training loss: 0.2758705934742466
Epoch 45 - Training loss: 0.2737947876099497
Epoch 46 - Training loss: 0.2716989791020751
Epoch 47 - Training loss: 0.2698657149448991
Epoch 48 - Training loss: 0.2708817786537111


INFO:apscheduler.scheduler:Scheduler has been shut down


Epoch 49 - Training loss: 0.26901235966943204
0.0005244343289492949
