## **1. upload data & tools**

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
#!pip install -q xgboost tensorflow

In [None]:
%%time
import numpy as np, pandas as pd, logging
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

logging.basicConfig(format="%(asctime)s — %(levelname)s — %(message)s", level=logging.INFO)

In [None]:
data_train = pd.read_csv('/kaggle/input/playground-series-s5e4/train.csv')
data_test = pd.read_csv('/kaggle/input/playground-series-s5e4/test.csv')
data_train.head(2)

In [None]:
data_test.head(2)

In [None]:
print(f"{data_train.shape}\n{data_test.shape}")

## **2. preprocessing data**

### **2.1 preprocessing training data**

In [None]:
for col in data_train.columns.to_list():
    if data_train[col].dtype == 'object':
        data_train[col] = LabelEncoder().fit_transform(data_train[col])
logging.info('categorical features successful done.')

In [None]:
for col in data_train.columns.to_list():
    if data_train[col].isna().sum() != 0:
        data_train[col] = data_train[col].fillna(data_train[col].mean())
logging.info('numerical features successful done.')

In [None]:
for col in data_train.columns.to_list():
    if data_train[col].dtype == 'float64' and col != 'Listening_Time_minutes':
        data_train[[col]] = StandardScaler().fit_transform(data_train[[col]])
logging.info('scalered successful done.')

In [None]:
data_train.head(2)

### **2.2 preprocessing testing data**

In [None]:
for col in data_test.columns.to_list():
    if data_test[col].dtype == 'object':
        data_test[col] = LabelEncoder().fit_transform(data_test[col])
logging.info('categorical features successful done.')

In [None]:
for col in data_test.columns.to_list():
    if data_test[col].isna().sum() != 0:
        data_test[col] = data_test[col].fillna(data_test[col].mean())
logging.info('numerical features successful done.')

In [None]:
for col in data_test.columns.to_list():
    if data_test[col].dtype == 'float64':
        data_test[[col]] = StandardScaler().fit_transform(data_test[[col]])
logging.info('scalered successful done.')

In [None]:
data_test.head(2)

### **2.3 spliting data**

In [None]:
X_train = data_train.iloc[:, 1:-1]
y_train = data_train.iloc[:, -1]

X_test = data_test.iloc[:,1:]
logging.info('spliting successful done')

## **3. creating & fiting model**

In [None]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [None]:
tree = DecisionTreeRegressor()
tree.fit(X_train, y_train)

In [None]:
y_pred_1 = reg.predict(X_test)
logging.info('prediction successful done')

In [None]:
y_pred_2 = tree.predict(X_test)
logging.info('prediction successful done')

## **4. creating submission data**

In [None]:
submission = pd.DataFrame({
    'id': data_test['id'],
    'Listening_Time_minutes': y_pred_2
})
submission.to_csv('submission.csv', index=False)
logging.info('submission successful created')