# Mobile Phone Classification / Prediction


In [273]:
import torch
import kagglehub
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch import nn
from tqdm.auto import tqdm

In [274]:
DATA_PATH = "/kaggle/input/mobile-price-classification"
DATA_NAME = "train.csv"
TEST_DATA_NAME = "test.csv"
dataset = ""
test_dataset=""
try:
  dataset = pd.read_csv(DATA_PATH + "/" + DATA_NAME)
  test_dataset = pd.read_csv(DATA_PATH + "/" + TEST_DATA_NAME)
except:
  file_path = kagglehub.dataset_download("iabhishekofficial/mobile-price-classification", DATA_PATH)
  print(file_path)
  dataset = pd.read_csv(DATA_PATH + "/" + DATA_NAME + "/train.csv")
  test_dataset = pd.read_csv(DATA_PATH + "/" + DATA_NAME + "/test.csv")


In [275]:
# Data Preprocessing, this is a binary dataset as all the columns have be seen to be float or int
# print(dataset.info())
# Let's first try making prediciting the price range of the device our target
target = ["price_range"]

X , y = dataset.drop(target, axis=1), dataset[target].squeeze()

X_train, y_train = torch.tensor(X.values, dtype=torch.float32), torch.tensor(y.values, dtype=torch.long)
test_dataset = test_dataset.drop("id", axis=1)
X_test = torch.tensor(test_dataset.values, dtype=torch.float32)
print(X_test.dtype)

train_dataset = TensorDataset(X_train, y_train)
test_datasets = TensorDataset(X_test)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_datasets, batch_size=32)

torch.float32


In [276]:
# Model
class MobilePriceModel(nn.Module):
  def __init__(self, input_size, hidden_units, output_size):
    super().__init__()

    self.linear_layer_1 = nn.Sequential(
        nn.Linear(in_features=input_size, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units)
    )

    self.linear_layer_2 = nn.Sequential(
        nn.Linear(in_features=hidden_units, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=output_size)
    )

  def forward(self, x):
    return self.linear_layer_2(self.linear_layer_1(x))




In [277]:
model = MobilePriceModel(input_size=X.shape[1], hidden_units=8, output_size=4)
# model

In [278]:
# loss and optimizer
# loss_fn = nn.BCELoss()
criterion = nn.CrossEntropyLoss()          # no weights needed unless classes are imbalanced
# loss = criterion(logits, y.long())

optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [279]:
# Train and Test
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 10

test_losses = []
train_losses = []
train_accuracies = []
test_accuracies = []

for epoch in tqdm(range(epochs)):


  model.train()
  epoch_train_loss = 0
  correct_train = 0

  for X, y in train_dataloader:
    optimizer.zero_grad()

    logits = model(X)

    loss = criterion(logits, y)

    loss.backward()

    optimizer.step()

    epoch_train_loss += loss.item() * len(y)

  #       # For accuracy (if classification)
  #   correct_train += ((y_pred.round() == y).sum().item())

  # train_losses.append(epoch_train_loss / len(train_dataset))
  # train_accuracies.append(correct_train / len(train_dataset))


  model.eval()
  epoch_test_loss = 0
  correct_test = 0
  preds = []

  with torch.inference_mode():
    for X in test_dataloader:
      # print(X.dtype)
      X  = X[0]
      logits = model(X)

      # loss = criterion(logits, y)
      preds.append(logits.argmax(dim=1).cpu())

      # epoch_test_loss += loss.item() * len(y)

  #     correct_test += ((y_pred.round() == y).sum().item())
  # test_losses.append(epoch_test_loss / len(test_dataset))
  # test_accuracies.append(correct_test / len(test_dataset))
  all_preds = torch.cat(preds).cpu().numpy()

  submission = pd.DataFrame({
      "id": range(len(all_preds)),
      "price_range": all_preds
  })
  submission.to_csv('submission.csv', index=False)


  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
# Let's plot the losses

import matplotlib.pyplot as plt

# Loss curve
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train/Test Loss Curve')
plt.legend()
plt.show()

# Accuracy curve (for classification)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train/Test Accuracy Curve')
plt.legend()
plt.show()
