In [80]:
import pandas as pd 
import numpy as np 
import torch 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.model_selection import train_test_split 
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer 
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import xgboost

In [63]:
df = pd.read_csv("./archive/CellPhone_train.csv")

In [64]:
df.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

In [65]:
X, y = df.drop(columns=['price_range']), df[['price_range']]

In [66]:
float_cols = X.select_dtypes(include=[float]).columns.to_list()

In [67]:
int_cols = X.select_dtypes(include=[int]).columns.to_list()

In [68]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

In [69]:
pipeline = ColumnTransformer(
    transformers = [('float_pipeline', MinMaxScaler(), float_cols)],
    remainder='passthrough')

In [70]:
pipeline.fit(X_train)

In [71]:
X_train = pipeline.transform(X_train)
X_test = pipeline.transform(X_test)

In [72]:
# ohe = OneHotEncoder(sparse_output=False, dtype=int)
# y_train = ohe.fit_transform(y_train)
# y_test = ohe.transform(y_test)

In [73]:
params = {
    'objective': 'multi:softmax',  # Multiclass classification
    'num_class': len(set(y)),       # Number of classes
    'eval_metric': 'merror'         # Evaluation metric: Multiclass classification error rate
}

classifier = xgboost.XGBClassifier(params)




In [74]:
classifier.fit(X_train, y_train)

In [75]:
y_hat = classifier.predict(X_test)

In [81]:
accuracy_score(y_hat, y_test)

0.9066666666666666

In [82]:
class PhoneDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.int32)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

    def __len__(self):
        return len(self.X)
        

In [83]:
train_dataset = PhoneDataset(X_train, y_train)
test_dataset = PhoneDataset(X_test, y_test)

In [158]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_num=20):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_num, 15),
            nn.LeakyReLU(),
            nn.Linear(15, 10),
            nn.LeakyReLU(),
            nn.Linear(10, 8),
            nn.LeakyReLU(),
            nn.Linear(8, 4),
        )

    def forward(self, X):
        X = self.net(X)
        return X

In [159]:
model = NeuralNetwork()

In [160]:
model

NeuralNetwork(
  (net): Sequential(
    (0): Linear(in_features=20, out_features=15, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=15, out_features=10, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=10, out_features=8, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=8, out_features=4, bias=True)
  )
)

In [161]:
n_epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.002)

BATCH_SIZE = 16
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)

In [162]:
model

NeuralNetwork(
  (net): Sequential(
    (0): Linear(in_features=20, out_features=15, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=15, out_features=10, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=10, out_features=8, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=8, out_features=4, bias=True)
  )
)

In [163]:
#temp_data = torch.rand([15, 20], dtype=torch.float32).to(device)

model(temp_data)

tensor([[-0.2479,  0.0980, -0.3175, -0.4065],
        [-0.2626,  0.1076, -0.3036, -0.4302],
        [-0.2587,  0.1139, -0.3111, -0.4259],
        [-0.2471,  0.0965, -0.3182, -0.4053],
        [-0.2443,  0.0890, -0.3193, -0.3991],
        [-0.2509,  0.0945, -0.3132, -0.4099],
        [-0.2545,  0.1041, -0.3122, -0.4174],
        [-0.2605,  0.1122, -0.3078, -0.4283],
        [-0.2538,  0.1077, -0.3144, -0.4175],
        [-0.2564,  0.1077, -0.3114, -0.4216],
        [-0.2554,  0.1068, -0.3120, -0.4196],
        [-0.2452,  0.0929, -0.3193, -0.4013],
        [-0.2424,  0.0949, -0.3234, -0.3983],
        [-0.2519,  0.0971, -0.3129, -0.4124],
        [-0.2600,  0.1093, -0.3077, -0.4272]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [164]:
train_dataset[0][1]

tensor([0], dtype=torch.int32)

In [165]:

train_losses = []
for epoch in range(n_epochs):
    model.train()
    batch_losses = []
    for feat, target in train_loader:
        feat = feat.to(device)
        pred = model(feat)
        target = torch.eye(4)[target].squeeze().to(device)
        loss = criterion(pred, target)
        true_ans += (pred.argmax(dim=1) == target.argmax(dim=1)).sum().item()        

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  
        batch_losses.append(loss.item())
    train_losses.append(np.mean(np.array(batch_losses)))
    # running_train_acc = true_ans / len(train_dataset)
    # print(running_train_acc)
    # train_acc.append(running_train_acc) 

2.6440054828470405
1.132353495467793
1.022093915803866
0.9751751504161141
0.9277983856472102
0.8876367739655755
0.8639476617628877
0.8288330638950522
0.8194854279810732
0.7795302471653982
0.7641673609614372
0.7441646646369587
0.7036652490496635
0.6841550736942075
0.6830767846920274
0.6816038133745844
0.6689212163063613
0.6315196288580244
0.6301494779234583
0.6157556403089653
0.6023448766632513
0.5999829450791533
0.5819039375267245
0.578553432090716
0.5754918686368249
0.5654558254913851
0.5809806548058987
0.549543868411671
0.5401533384892073
0.5220759998668324
0.5180973386899992
0.5196428762918169
0.5001254366202788
0.4893798039040782
0.48327894627370616
0.4948089166798375
0.47887488115917554
0.47542051192034374
0.4677108913998712
0.46790695732290094
0.4687250652773814
0.448253323239359
0.4478280210698193
0.44418154267424886
0.43001564439724793
0.424492257731882
0.42229732633991673
0.418311150575226
0.4111458264629949
0.40693068013272504
0.39586315947500145
0.392534478482875
0.386550020

In [23]:
logits = torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5],
                       [0.5, 0.4, 0.3, 0.2, 0.1],
                       [0.2, 0.3, 0.4, 0.5, 0.6],
                       [0.4, 0.3, 0.2, 0.1, 0.0],
                       [0.5, 0.5, 0.5, 0.5, 0.5],
                       [0.1, 0.1, 0.1, 0.1, 0.1],
                       [0.3, 0.2, 0.1, 0.0, 0.5]])

# Integer targets
targets = torch.tensor([0, 3, 4, 4, 3, 0, 2])

# Initialize CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

# Calculate the loss
loss = criterion(logits, targets)