In [162]:
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

from sklearn.preprocessing import LabelEncoder
from haversine import haversine

import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("final_starbucks.csv")

for col in ["Station", "Shape"]:
    encoder = LabelEncoder()
    df[col] = encoder.fit_transform(np.array(df[col]).reshape(-1, 1))

In [163]:
#1 위도 -> 위도 / 경도 -> 경도

X1 = df[["E_latitude", "D_latitude", "O_latitude"]]; y1 = df["S_latitude"]
X2 = df[["E_longitude", "D_longitude", "O_longitude"]]; y2 = df["S_longitude"]

test1 = pd.DataFrame({"E_latitude" : [37.5964661], "D_longitude" : [37.5963151], "O_latitude" : [37.5962689]})
test2 = pd.DataFrame({"E_longitude" : [127.087509], "D_latitude" : [127.084429], "O_longitude" : [127.086105]}) 
S = [37.5969712, 127.086649]

models = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]
models2 = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]

model_name = ["LR", "SVR", "RF", "LGBM", "XGB"]
predict = []
_idx1, _idx2 = 0, 0
for m1 in models:
    _idx2 = 0
    for m2 in models2:
        m1.fit(X1, y1)
        m2.fit(X2, y2)
        predict.append([(_idx1, _idx2), m1.predict(test1), m2.predict(test2)])
        _idx2 += 1
    _idx1 += 1

score1 = []
for i in predict:
    score1.append(["A", i[0], (i[1] - S[0]) ** 2 + (i[2] - S[1]) ** 2, (float(i[1]), float(i[2]))])

score1 = sorted(score1, key = lambda x : x[1])

In [164]:
lst = [127.087509, 37.5964661, 127.084429, 37.5963151, 127.086105, 37.5962689]

ED = haversine((lst[1], lst[0]), (lst[3], lst[2]))
DO = haversine((lst[3], lst[2]), (lst[5], lst[4]))
OE = haversine((lst[5], lst[4]), (lst[1], lst[0]))

temp = sorted([ED, DO, OE], reverse = True)
if temp[0]**2 > temp[1]**2 + temp[2]**2:
    Shape = "Obtuse"
elif temp[0]**2 == temp[1]**2 + temp[2]**2:
    Shape = "Right"
else: Shape = "Acute"

s = ED/2 + DO/2 + OE/2
Area = np.sqrt(s * (s - ED) * (s - DO) * (s - OE))

tri = pd.DataFrame({
    "ED" : [ED],
    "DO" : [DO],
    "OE" : [OE],
    "Shape" : [Shape],
    "Area" : [Area]
})

tri["Shape"] = encoder.transform(np.array(tri["Shape"]).reshape(-1, 1))

In [165]:
#2. 삼각형 -> 위도, 경도 
X = df.iloc[:, -5:]; y1 = df.loc[:, "S_latitude"]; y2 = df.loc[:, "S_longitude"]
test = df[["ED", "DO", "OE", "Shape", "Area"]]

models = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]
models2 = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]

model_name = ["LR", "SVR", "RF", "LGBM", "XGB"]
predict = []
_idx1, _idx2 = 0, 0
for m1 in models:
    _idx2 = 0
    for m2 in models2:
        m1.fit(X, y1)
        m2.fit(X, y2)
        predict.append([(_idx1, _idx2), m1.predict(tri), m2.predict(tri)])
        _idx2 += 1
    _idx1 += 1

score2 = []
for i in predict:
    score2.append(["B", i[0], (i[1] - S[0]) ** 2 + (i[2] - S[1]) ** 2, (float(i[1]), float(i[2]))])

score2 = sorted(score2, key = lambda x : x[1])

In [166]:
#3. 위도 + 경도 -> 위도, 경도 (모델 2개)

X = df[["E_latitude", "E_longitude", "D_latitude", "D_longitude", "O_latitude", "O_longitude"]]
y1 = df["S_latitude"]; y2 = df["S_longitude"]

test = pd.DataFrame({"E_latitude" : [37.5964661], 
                     "E_longitude" : [127.087509], 
                     "D_latitude" : [37.5963151], 
                     "D_longitude" : [127.084429], 
                     "O_latitude" : [37.5962689], 
                     "O_longitude" : [127.086105]})

models = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]
models2 = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]
model_name = ["LR", "SVR", "RF", "LGBM", "XGB"]
predict = []
_idx1, _idx2 = 0, 0
for m1 in models:
    _idx2 = 0
    for m2 in models2:
        m1.fit(X, y1)
        m2.fit(X, y2)
        predict.append([(_idx1, _idx2), m1.predict(test), m2.predict(test)])
        _idx2 += 1
    _idx1 += 1

score3 = []
for i in predict:
    score3.append(["C", i[0], (i[1] - S[0]) ** 2 + (i[2] - S[1]) ** 2, (float(i[1]), float(i[2]))])

score3 = sorted(score3, key = lambda x : x[1])

In [167]:
#4. 위도 + 삼각형 -> 위도 / 경도 + 삼각형 -> 경도 

X1 = df[["E_latitude", "D_latitude", "O_latitude", "ED", "DO", "OE", "Shape", "Area"]]; y1 = df["S_latitude"]
X2 = df[["E_longitude", "D_longitude", "O_longitude", "ED", "DO", "OE", "Shape", "Area"]]; y2 = df["S_longitude"]
test_a = pd.concat([test1, tri], axis = 1); test_b = pd.concat([test2, tri], axis = 1)

models = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]
models2 = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]

model_name = ["LR", "SVR", "RF", "LGBM", "XGB"]
predict = []
_idx1, _idx2 = 0, 0
for m1 in models:
    _idx2 = 0
    for m2 in models2:
        m1.fit(X1, y1)
        m2.fit(X2, y2)
        predict.append([(_idx1, _idx2), m1.predict(test_a), m2.predict(test_b)])
        _idx2 += 1
    _idx1 += 1

score4 = []
for i in predict:
    score4.append(["D", i[0], (i[1] - S[0]) ** 2 + (i[2] - S[1]) ** 2, (float(i[1]), float(i[2]))])

score4 = sorted(score4, key = lambda x : x[1])

In [168]:
# 5. 위도 + 경도 + 삼각형 -> 위도, 경도 

X = df[["E_latitude", "E_longitude", "D_latitude", "D_longitude", "O_latitude", "O_longitude", "ED", "DO", "OE", "Shape", "Area"]]
y1 = df["S_latitude"]; y2 = df["S_longitude"]
Test = pd.concat([test, tri], axis = 1)

models = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]
models2 = [LinearRegression(), SVR(), RandomForestRegressor(), LGBMRegressor(), XGBRegressor()]

model_name = ["LR", "SVR", "RF", "LGBM", "XGB"]
predict = []
_idx1, _idx2 = 0, 0
for m1 in models:
    _idx2 = 0
    for m2 in models2:
        m1.fit(X, y1)
        m2.fit(X, y2)
        predict.append([(_idx1, _idx2), m1.predict(Test), m2.predict(Test)])
        _idx2 += 1
    _idx1 += 1

score5 = []
for i in predict:
    score5.append(["E", i[0], (i[1] - S[0]) ** 2 + (i[2] - S[1]) ** 2, (float(i[1]), float(i[2]))])

score5 = sorted(score5, key = lambda x : x[1])

In [169]:
ndf = pd.DataFrame(score1 + score2 + score3 + score4 + score5)
S = (37.5969712, 127.086649)

for i in range(len(ndf)):
    temp = ndf.iloc[i, -1]
    ndf.iloc[i, 2] = haversine(S, temp)

In [173]:
ndf.columns = ["Method", "Models", "Distant", "Coordinate"]
final_df = ndf.sort_values("Distant")
final_df.reset_index(inplace = True, drop = True)
final_df.to_csv("final_result.csv", index = False)