In [None]:
pip install catboost

In [None]:
import pandas as pd
import numpy as np

from catboost import CatBoostRegressor

from sklearn.metrics import mean_squared_error

In [None]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

In [None]:
test = pd.read_csv('/content/drive/MyDrive/Kaggle/test.csv')

In [None]:
test = reduce_mem_usage(test)

In [None]:
x_train = pd.read_csv('/content/drive/MyDrive/Kaggle/x_train.csv')

In [None]:
x_train = reduce_mem_usage(x_train)

In [None]:
x_test = pd.read_csv('/content/drive/MyDrive/Kaggle/x_test.csv')

In [None]:
x_test = reduce_mem_usage(x_test)

In [None]:
y_train = pd.read_csv('/content/drive/MyDrive/Kaggle/y_train.csv')

In [None]:
y_train = reduce_mem_usage(y_train)

In [None]:
y_test = pd.read_csv('/content/drive/MyDrive/Kaggle/y_test.csv')

In [None]:
y_test = reduce_mem_usage(y_test)

In [None]:
CB = CatBoostRegressor()
CB_fit = DT.fit(x_train,y_train)

In [None]:
y_pred_CB = CB_fit.predict(x_test)

In [None]:
RMSE_CB = mean_squared_error(y_test, y_pred_CB) ** 0.5

In [None]:
submission3  = pd.read_csv('/content/drive/MyDrive/Kaggle/sample_submission/sample_submission.csv')

In [None]:
Y_pred = CB_fit.predict(test)

In [None]:
submission3['meter_reading'] = np.exp(Y_pred)

In [None]:
submission3['meter_reading'] = 3.4118 * submission3['meter_reading']

In [None]:
submission3.loc[submission3['meter_reading']<0, 'meter_reading'] = 0

In [None]:
submission3.to_csv('/content/drive/MyDrive/Kaggle/submission3.csv', index=False)