In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import BayesianRidge, LinearRegression, ElasticNet
from sklearn.model_selection import cross_val_score    # 交叉驗證
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score  
import matplotlib.pyplot as plt
import seaborn as sns

## 匯入資料及確認各欄位資料型態

In [None]:
df = pd.read_csv('corr_all_2.csv')

df = df.rename(columns={'時間':'time'})
df['time'] = df['time'].astype('datetime64[ns]').dt.strftime('%Y-%m-%d')
df = df.set_index('time')
df

## 資料正規化，使用Minmax

#### 考量原因:銷售金額總金額總數遠大於登山人次，且希望使走勢圖更為明顯

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
# Apply scaler() to all the columns except the 'yes-no' and 'dummy' variables
num_vars = ['Snow Peak', 'Fjallraven', 'Hilleberg', 'MAMMUT', '100mountain'\
            ,'mont-bell','Zamberlan','MysteryRanch','始祖鳥 Arcteryx'\
            ,'Komperdell','north_easy','north_normal','north_hard',\
            'south_easy','south_normal','south_hard','central_easy',\
            'central_normal','central_hard','east_easy','east_normal',\
            'east_hard','total']
df_normalized=df
df_normalized[num_vars] = scaler.fit_transform(df_normalized[num_vars])
df_normalized.head()

### 開始跑線性回歸

### 所有特徵值的相關係數

In [None]:
corr = df.corr()
corr

## 篩選出相關係數較高者為分析目標

In [None]:
corr = df.corr()['east_hard'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

### 熱圖

In [None]:
plt.figure(figsize = (20, 25)) #調整大小
sns.heatmap(corr, cmap='GnBu_r', square=True, annot=True)
plt.savefig('xx.png')

### 以不同的特徵值與各欄位比較結果，抓取相關性大於0.25

In [None]:
corr = df.corr()['north_hard'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['Snow Peak'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['Fjallraven'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['MAMMUT'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['mont-bell'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['Zamberlan'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['MysteryRanch'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['始祖鳥 Arcteryx'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

In [None]:
corr = df.corr()['Komperdell'][0:] # -1 because the latest row is SalePrice
golden_features_list = corr[abs(corr) > 0.25].sort_values(ascending=False)
print("There is {} strongly correlated values with SalePrice:\n{}".format(len(golden_features_list), golden_features_list))

### 錯誤測試，發現資料跑完上面資料‧會剩單一欄位。

In [None]:
sns.heatmap(corr.to_frame())

### 回歸分析圖

In [None]:
sns.set_theme(color_codes=True)
sns.regplot(x="east_normal", y="Komperdell", data=df_normalized)

In [None]:
sns.set_theme(color_codes=True)
sns.regplot(x="east_hard", y="Zamberlan", data=df)

In [None]:
sns.set_theme(color_codes=True)
sns.regplot(x="east_hard", y="Fjallraven", data=df_normalized)

In [None]:
sns.set_theme(color_codes=True)
sns.regplot(x="north_normal", y="Snow Peak", data=df_normalized)


In [None]:
sns.set_theme(color_codes=True)
sns.regplot(x="Fjallraven", y="mont-bell", data=df)