In [10]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader, Dataset







df = pd.read_csv("train.csv")


df['date'] = pd.to_datetime(df['date'])

# Extract time-related features
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['weekday'] = df['date'].dt.weekday  # Day of the week (0=Monday, 6=Sunday)

# Drop the original 'date' column
df.drop('date', axis=1, inplace=True)

# Fill missing values
df['country'].fillna(df['country'].mode()[0], inplace=True)
df['store'].fillna(df['store'].mode()[0], inplace=True)
df['product'].fillna(df['product'].mode()[0], inplace=True)

# Encode categorical columns
labelencoder = LabelEncoder()
df['country'] = labelencoder.fit_transform(df['country'])
df['product'] = labelencoder.fit_transform(df['product'])
df['store'] = labelencoder.fit_transform(df['store'])

# Fill target NaN values
df['num_sold'].fillna(df['num_sold'].mean(), inplace=True)

# Prepare features and target
features = df.drop(['id', 'num_sold'], axis=1).values  # Drop 'id' and 'num_sold'
target = df['num_sold'].values

# Train-Test Split
X_train, X_val, y_train, y_val = train_test_split(features, target, random_state=42, test_size=0.2)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Create Sequences
def create_sequences(data, targets, seq_len):
    sequences = []
    labels = []
    for i in range(len(data) - seq_len):
        sequences.append(data[i:i+seq_len])
        labels.append(targets[i+seq_len])
    return torch.tensor(sequences, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32)

seq_len = 7
X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_len)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, seq_len)

# Custom Dataset Class
class StickerSales(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# DataLoader
train_dataset = StickerSales(X_train_seq, y_train_seq)
val_dataset = StickerSales(X_val_seq, y_val_seq)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout=0.2):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.gru.num_layers, x.size(0), self.gru.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])  # Many-to-one
        return out

# Hyperparameters
input_size = X_train.shape[1]  # After adding time features, this will be larger
hidden_size = 64
output_size = 1
num_layers = 2
dropout = 0.2
learning_rate = 0.001
num_epochs = 50

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize Model, Loss, Optimizer
model = GRUModel(input_size, hidden_size, output_size, num_layers, dropout).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    val_loss = 0
    model.eval()
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            val_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")
    torch.save(model.state_dict(), 'gru_model.pth')





The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['country'].fillna(df['country'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['store'].fillna(df['store'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which

Epoch 1/50, Train Loss: 795228.6740, Val Loss: 626541.7564
Epoch 2/50, Train Loss: 531608.4794, Val Loss: 475728.1655
Epoch 3/50, Train Loss: 461436.6251, Val Loss: 459468.5132
Epoch 4/50, Train Loss: 457715.7375, Val Loss: 459345.9717
Epoch 5/50, Train Loss: 457770.6118, Val Loss: 459348.5185
Epoch 6/50, Train Loss: 457686.1675, Val Loss: 459346.1796
Epoch 7/50, Train Loss: 457834.6785, Val Loss: 459348.2433
Epoch 8/50, Train Loss: 457672.6536, Val Loss: 459352.3147
Epoch 9/50, Train Loss: 457705.9874, Val Loss: 459375.1579
Epoch 10/50, Train Loss: 457768.3848, Val Loss: 459348.2679
Epoch 11/50, Train Loss: 457686.7059, Val Loss: 459360.0307
Epoch 12/50, Train Loss: 457676.8343, Val Loss: 459346.9185
Epoch 13/50, Train Loss: 457672.7115, Val Loss: 459351.6116
Epoch 14/50, Train Loss: 457673.6702, Val Loss: 459346.1816
Epoch 15/50, Train Loss: 457716.7052, Val Loss: 459346.5808
Epoch 16/50, Train Loss: 457817.3099, Val Loss: 459346.6156
Epoch 17/50, Train Loss: 457766.2475, Val Loss: 4

In [11]:
# Load test data and sample submission
test_data = pd.read_csv("test.csv")
sample_submission = pd.read_csv("sample_submission.csv")

# Fill missing values
test_data['country'] = test_data['country'].fillna(test_data['country'].mode()[0])
test_data['store'] = test_data['store'].fillna(test_data['store'].mode()[0])
test_data['product'] = test_data['product'].fillna(test_data['product'].mode()[0])

# Label encode categorical features
labelencoder = LabelEncoder()
test_data['country'] = labelencoder.fit_transform(test_data['country'])
test_data['store'] = labelencoder.fit_transform(test_data['store'])
test_data['product'] = labelencoder.fit_transform(test_data['product'])

# Drop unnecessary columns and extract features
X_test = test_data.drop(['id', 'date'], axis=1).values

# Standardize the features
scaler = StandardScaler()
X_test = scaler.fit_transform(X_test)

# Define custom dataset for test data
class TestDataset(Dataset):
    def __init__(self, features):
        self.features = torch.tensor(features, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx]

# Create DataLoader
test_dataset = TestDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the GRU model (replace with your model definition)
class GRUModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(GRUModel, self).__init__()
        self.gru = torch.nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])  # Use the output of the last time step
        return out

# Initialize and load the model
input_size = X_test.shape[1]
hidden_size = 64  # Adjust based on your model
output_size = 1
num_layers = 2
dropout = 0.3

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GRUModel(input_size, hidden_size, output_size, num_layers, dropout).to(device)

model.eval()

# Generate predictions
predictions = []
with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs.to(device)
        inputs = inputs.unsqueeze(1)  # Add sequence length dimension (seq_len=1)
        outputs = model(inputs)
        predictions.extend(outputs.squeeze(1).cpu().numpy())  # Flatten outputs

# Validate the number of predictions
print("Number of predictions:", len(predictions))
print("Number of samples in sample_submission:", len(sample_submission))

# Check for length mismatch and update submission
if len(predictions) == len(sample_submission):
    sample_submission['num_sold'] = predictions
    sample_submission.to_csv("updated_submission.csv", index=False)
    print("Updated submission file saved as 'updated_submission.csv'")
else:
    print("Mismatch detected! Predictions do not match the number of rows in sample_submission.")


Number of predictions: 98550
Number of samples in sample_submission: 98550
Updated submission file saved as 'updated_submission.csv'
