## Note
Trying out different models and paramaters to see which performs the best

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
# Loading the evaluation data
df = pd.read_csv("../csv_files/development.csv")

In [6]:
# Extracting the positions, removes the x and y column and splitting the data into train and validation set
pos = []
for i in range(len(df)):
    pos.append([df["x"].iloc[i], df["y"].iloc[i]])

df.drop(["x", "y"], axis=1, inplace=True)

X_train, X_val, pos_train, pos_val = train_test_split(df, pos, test_size=0.2, random_state=42)

In [7]:
# Using sklearn multioutputregressor with ridge 
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge, LinearRegression
mult_regr = MultiOutputRegressor(LinearRegression()).fit(X_train, pos_train)
pos_pred = mult_regr.predict(X_val)

In [8]:
# Metrics to evaluating model 
import sklearn.metrics as sm

def metrics_on_model(pos_val, pos_pred):
    print("Mean absolute error =", round(sm.mean_absolute_error(pos_val, pos_pred), 2)) 
    print("Mean squared error =", round(sm.mean_squared_error(pos_val, pos_pred), 2)) 
    print("Median absolute error =", round(sm.median_absolute_error(pos_val, pos_pred), 2)) 
    print("Explain variance score =", round(sm.explained_variance_score(pos_val, pos_pred), 2)) 
    print("R2 score =", round(sm.r2_score(pos_val, pos_pred), 2))
    #print("Mean eucledian distance =", round(sm.euclidean_distances(pos_val, pos_pred), 2))

metrics_on_model(pos_val, pos_pred)

Mean absolute error = 10.87
Mean squared error = 215.86
Median absolute error = 8.33
Explain variance score = 0.98
R2 score = 0.98


KeyboardInterrupt: 

## Using the multiple_reg_model on the evaluation set

In [None]:
ev_data = pd.read_csv("../csv_files/evaluation.csv")

In [None]:
# Extracting the ID
eval_id = ev_data["Id"]

# Dropping the Id column from the ev_data
ev_data.drop(["Id"], axis=1, inplace=True)

In [None]:
# Formatting the position array to a string to be used in the .csv file 
def pred_to_string(prediction_array):
    pred_column = []
    for i in range(len(prediction_array)):
        pos_string = (str(prediction_array[i][0]) + "|" + str(prediction_array[i][1]))
        pred_column.append(pos_string)
    return pred_column
        

In [None]:
# Predicting the evaluation results
mult_regr_eval = mult_regr.predict(ev_data)
pos_pred = pred_to_string(mult_regr_eval) # Formatting the predictions 

In [None]:
# Creating a df and .csv file to be submitted. Saved in submission_file folder
mult_reg_submission = pd.DataFrame({'Id': eval_id, 'Predicted': pos_pred})
mult_reg_submission.to_csv("../DataScienceLab_Project/submission_files/mult_reg_first_try.csv", index=False)