In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor

# XGBOOST

In [23]:
online = pd.read_csv("online_summary.csv")
offline = pd.read_csv("offline_summary.csv")

offline.head()

# Define the features and the targets
offline_features = offline.drop(['Unnamed: 0', "pos_XY", 'posX', "posY"], axis=1)
offline_targets = offline[['posX', "posY"]]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(offline_features, offline_targets, test_size=0.2, random_state=42)

#I established hyperparams using a grid search, it's prone to overfit.
model =XGBRegressor(n_estimators = 100, learning_rate = 0.13, max_depth = 8)
# fit model
model.fit(X_train, y_train)

#predict locaton
y_pred = model.predict(X_test)
#compare predicted locations with real loc.
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

# new_data = online[]
# # # Predict from model (clf)
# # ypred_train = model.predict(X_train)
# # ypred_test = model.predict(X_test)

Mean Absolute Error: 1.0540432382155869


In [24]:
online_features = online.drop(['Unnamed: 0', "pos_XY", 'posX', "posY"], axis=1)
online_targets = online[['posX', "posY"]]
#predicted locs
y_online_pred = model.predict(online_features)
mae = mean_absolute_error(y_online_pred, online_targets)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 1.7534240546921889


In [25]:
pred_df = pd.DataFrame(y_online_pred, columns =["XGB_pred_X", 'XGB_pred_Y'])
pred_df["XGB_test_X"] = online_targets['posX']
pred_df["XGB_test_Y"] = online_targets['posY']
#online_targets
pred_df['XGB_Error_X'] = abs(pred_df["XGB_test_X"] - pred_df['XGB_pred_X'])
pred_df['XGB_Error_Y'] = abs(pred_df["XGB_test_Y"] - pred_df["XGB_pred_Y"])
pd.options.display.float_format = "{:,.2f}".format
pred_df = pred_df.round(2)

In [26]:
pred_df.to_csv('XGB_predictions.csv')

# K-neighbors

In [27]:
online = pd.read_csv("online_summary.csv")
offline = pd.read_csv("offline_summary.csv")
# Define the features and the targets
offline_features = offline.drop(['Unnamed: 0', "pos_XY", 'posX', "posY"], axis=1).to_numpy()
offline_targets = offline[['posX', "posY"]].to_numpy()
online_features = online.drop(['Unnamed: 0', "pos_XY", 'posX', "posY"], axis=1).to_numpy()
online_targets = online[['posX', "posY"]].to_numpy()

online_subset = online_features[: 4, ]
online_subset

array([[130.5       , 135.        , -52.22727273, -62.94897959,
        -61.81395349, -40.06896552, -63.04301075, -55.23333333],
       [112.3       ,  90.        , -55.27522936, -73.96190476,
        -72.70103093, -47.81308411, -69.45454545, -46.88      ],
       [230.1       , 225.        , -51.70909091, -70.08247423,
        -70.0989011 , -54.08823529, -69.13157895, -53.88659794],
       [  5.8       ,   0.        , -49.5       , -64.25806452,
        -72.59770115, -45.65289256, -60.79746835, -49.58      ]])

In [28]:
neigh_alg = KNeighborsRegressor(n_neighbors = 5)
neigh_alg.fit(offline_features, offline_targets)
#find positions of closest neightbor on test data
neighbors = neigh_alg.predict(online_features)
neighbors = np.round(neighbors)
pred_X = neighbors[:,0]
pred_Y = neighbors[:,1]
online_targets = pd.DataFrame(online_targets, columns = ["posX", "posY"])

mae = mean_absolute_error(neighbors, online_targets)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 2.4952499999999995


In [29]:
pred_df = pd.DataFrame(neighbors, columns =["Knn_pred_X", 'Knn_pred_Y'])
pred_df["Knn_test_X"] = online_targets['posX']
pred_df["Knn_test_Y"] = online_targets['posY']
pred_df['Knn_Error_X'] = abs(pred_df["Knn_test_X"] - pred_df['Knn_pred_X'])
pred_df['Knn_Error_Y'] = abs(pred_df["Knn_test_Y"] - pred_df["Knn_pred_Y"])
pd.options.display.float_format = "{:,.2f}".format
pred_df = pred_df.round(2)

In [30]:
pred_df.to_csv('KNN_predictions.csv')

In [31]:
pred_df

Unnamed: 0,Knn_pred_X,Knn_pred_Y,Knn_test_X,Knn_test_Y,Knn_Error_X,Knn_Error_Y
0,6.0,6.0,0.0,0.05,6.0,5.95
1,2.0,9.0,0.15,9.42,1.85,0.42
2,3.0,9.0,0.31,11.09,2.69,2.09
3,1.0,10.0,0.47,8.2,0.53,1.8
4,2.0,9.0,0.78,10.94,1.22,1.94
5,5.0,9.0,0.93,11.69,4.07,2.69
6,2.0,8.0,1.08,12.19,0.92,4.19
7,2.0,8.0,1.24,3.93,0.76,4.07
8,6.0,7.0,1.39,6.61,4.61,0.39
9,1.0,10.0,1.52,9.32,0.52,0.68
