In [None]:
import numpy as np
import pandas as pd
import scipy as sp

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor

In [None]:
trainData  = f'https://raw.githubusercontent.com/Datamanim/datarepo/main/hyundai/train.csv'
testData  = f'https://raw.githubusercontent.com/Datamanim/datarepo/main/hyundai/test.csv'
subData  = f'https://raw.githubusercontent.com/Datamanim/datarepo/main/hyundai/submission.csv'

In [None]:
train = pd.read_csv(trainData)
test = pd.read_csv(testData)
sub = pd.read_csv(subData)

In [None]:
# train['model'] = LabelEncoder().fit_transform(train['model'])
train['transmission'] = LabelEncoder().fit_transform(train['transmission'])
train['fuelType'] = LabelEncoder().fit_transform(train['fuelType'])

In [None]:
X = train.drop(['model','price'], axis=1)
y = train['price']
X = StandardScaler().fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# help(XGBRegressor())

In [None]:
%%time
pipe = Pipeline([('scaler', StandardScaler()),('model', XGBRegressor())])
params = {'model__max_depth':[1,10,100], 'model__learning_rate': [1, 10], 'model__random_state':[1,10,100]}
cv = GridSearchCV(pipe, params, n_jobs=10).fit(X_train, y_train)
y_pred = cv.predict(X_test)
r2_score(y_test, y_pred)

In [None]:
rf = RandomForestRegressor(random_state=42).fit(X_train, y_train)
y_pred = rf.predict(X_test)
r2_score(y_test, y_pred)

In [None]:
lr = LinearRegression().fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

In [None]:
ad = AdaBoostRegressor().fit(X_train, y_train)
y_pred = ad.predict(X_test)
r2_score(y_test, y_pred)

In [None]:
test['model'] = LabelEncoder().fit_transform(test['model'])
test['transmission'] = LabelEncoder().fit_transform(test['transmission'])
test['fuelType'] = LabelEncoder().fit_transform(test['fuelType'])
test = test.drop(['model'], axis=1)
test = MinMaxScaler().fit_transform(test)

In [None]:
test_test = rf.predict(test)
sub['0'] = test_test
sub.to_csv('18652.csv', index=False)