In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
import xgboost as xgb

In [6]:
def get_data_loader(file, features, features_to_encode):
    df = pd.read_csv(file)
    imputer = SimpleImputer(strategy="median")
    df[["Episode_Length_minutes"]] = imputer.fit_transform(df[["Episode_Length_minutes"]])
    imputer = SimpleImputer(strategy="most_frequent")
    df[["Number_of_Ads"]] = imputer.fit_transform(df[["Number_of_Ads"]])
    def one_hot(df, feature):
        encoded = pd.get_dummies(df[[feature]])
        result = pd.concat([df, encoded], axis=1)
        result = result.drop([feature], axis=1)
        return(result) 
    y = df[["Listening_Time_minutes"]]
    df = df[features]
    for to_encode in features_to_encode:
        df = one_hot(df, to_encode)
    x = df
    x = x.to_numpy().astype(np.float32)
    y = y.to_numpy().astype(np.float32)
    return x, y

features=["Episode_Length_minutes", "Number_of_Ads", "Episode_Sentiment"]
features_to_encode = ["Episode_Sentiment"]
x, y = get_data_loader("train.csv", 
    features=features,
    features_to_encode=features_to_encode )

In [7]:
model = xgb.XGBRegressor(
    objective="reg:squarederror",
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    verbosity=1
)

model.fit(x, y)
model.save_model("xgb_model.json")

In [9]:
def get_test_loader(file, features, features_to_encode):
    df = pd.read_csv(file)
    def one_hot(df, feature):
        encoded = pd.get_dummies(df[[feature]])
        result = pd.concat([df, encoded], axis=1)
        result = result.drop([feature], axis=1)
        return(result) 
    df = df[features]
    imputer = SimpleImputer(strategy="median")
    df[["Episode_Length_minutes"]] = imputer.fit_transform(df[["Episode_Length_minutes"]])
    imputer = SimpleImputer(strategy="most_frequent")
    df[["Number_of_Ads"]] = imputer.fit_transform(df[["Number_of_Ads"]])
    for to_encode in features_to_encode:
        df = one_hot(df, to_encode)
    x = df
    x_tensor = x.to_numpy().astype(np.float32)
    return x_tensor

features=["Episode_Length_minutes", "Number_of_Ads", "Episode_Sentiment"]
features_to_encode = ["Episode_Sentiment"]
test_dataloader = get_test_loader("test.csv", 
    features=features,
    features_to_encode=features_to_encode )

X_test = get_test_loader("test.csv", features, features_to_encode)

# Predict
preds = model.predict(X_test)

# Write to submission file
import csv

with open("submission_xgb.csv", "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["id", "prediction"])
    for i, pred in enumerate(preds, start=750000):
        writer.writerow([i, pred])
