In [62]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore")

In [63]:
df = pd.read_csv("internship_train.csv")

### Splitting data into train and test for modelling and Scaling

In [64]:
X, y = df.drop("target", axis = 1), df.target

In [65]:
df.shape

(90000, 54)

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
cols = X_train.columns
transformer = StandardScaler()
X_train = transformer.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns = cols)
X_test = transformer.transform(X_test)


In [68]:
X_test

array([[-0.79130327,  0.19505345,  1.00014034, ..., -0.80921982,
         0.22577126, -0.87233901],
       [ 0.10817396, -0.90965354,  0.14708172, ...,  0.07967858,
         0.47393547,  1.45662395],
       [-0.07864054, -0.36772181,  1.37465388, ...,  0.00710551,
         0.17174455,  0.08874996],
       ...,
       [-0.30696938,  0.63971538,  0.74352921, ..., -1.01974596,
         0.39240039, -1.23596132],
       [ 1.65804241, -1.18061941,  0.75740008, ..., -0.77033141,
         1.04241795, -1.69292647],
       [-1.58699466, -0.97218413, -0.60194536, ..., -1.69794346,
         0.75576385, -1.11329136]])

### Modelling with Decision Tree Regressor

In [69]:
dt = DecisionTreeRegressor().fit(X_train, y_train)

In [70]:
y_predDT = dt.predict(X_test)

In [71]:
r2 = r2_score(y_test, y_predDT)

In [72]:
mean_absolute_error(y_test, y_predDT)

0.005674671430553154

In [73]:
np.sqrt(mean_squared_error(y_test, y_predDT))

0.007754786384688464

### Writing internship_hidden_test with new predicted values to new Hidden_test.csv

In [74]:
test = pd.read_csv("internship_hidden_test.csv")
hidden_test_predictions = dt.predict(test)
test["Predicted Target"] = hidden_test_predictions

In [75]:
test.to_csv("hidden_test.csv", index = False)