In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import mean_squared_error

In [None]:
# Read datasets
train_data = pd.read_csv('/kaggle/input/30-days-of-ml/train.csv')
test_data = pd.read_csv('/kaggle/input/30-days-of-ml/test.csv')

In [None]:
# Check data
train_data.head()

In [None]:
# Check shapes
train_data.shape, test_data.shape

In [None]:
# Check columns
train_data.columns, test_data.columns

In [None]:
# Copy datasets for altering
train_full = train_data.copy()
test_full = test_data.copy()

In [None]:
# Seperate features and targets
y = train_full['target']
X = train_full.drop('target', axis = 1)

In [None]:
# Check for Nulls
y.isnull().any(), X.isnull().any(), test_full.isnull().any()

In [None]:
# Create train set and validation set
X_train, X_valid, y_train, y_valid = train_test_split(X,y, random_state = 0)

In [None]:
# Check shapes once more
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

In [None]:
# Check for object cols
category_cols = [col for col in X_train.columns if X_train[col].dtype == 'object']
category_cols

In [None]:
# Check for unique values in category cols
for col in category_cols:
    print(X_train[col].unique())

In [None]:
# Encoding Category Cols
enc = OrdinalEncoder()

cat_X_train = X_train.copy()
cat_X_valid = X_valid.copy()

cat_X_train[category_cols] = enc.fit_transform(X_train[category_cols])
cat_X_valid[category_cols] = enc.transform(X_valid[category_cols])
test_full[category_cols] = enc.transform(test_full[category_cols])

In [None]:
# Modelling
model = RandomForestRegressor(n_estimators = 100, random_state = 0)

model.fit(cat_X_train, y_train)
y_predict = model.predict(cat_X_valid)
score = mean_squared_error(y_valid, y_predict)
score

In [None]:
test_predictions = model.predict(test_full)

In [None]:
output = pd.DataFrame({'Id': test_full['id'],
                      'Target':test_predictions})

In [None]:
output.to_csv('my_submission.csv', index = False)