## 1. PCC / 2. Mutual Info Scores / 3. LR / 4. RF
## 거리 적용은 머신러닝 파트 / 통계 분석에서 지역구별로 각각의 개수로 영향 평가

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

path = './data/'

# Load your dataset
data = pd.read_csv(path + 'Total_APT_for_Target_Features.csv', encoding='utf-8')

# Define X (features) and y (target variable)
X = data.drop(columns=['JS_Price'])
y = data['JS_Price']

# Step 1: Calculate Pearson Correlation Coefficients (PCC)
pcc_ranking = X.corrwith(y).abs()

# Step 2: Compute Mutual Information Scores
mi_scores = mutual_info_regression(X, y)
mi_ranking = pd.Series(mi_scores, index=X.columns)

# Step 3: Fit a Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X, y)
linear_regression_coefficients = pd.Series(linear_model.coef_, index=X.columns).abs()

# Step 4: Train a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=1000, random_state=100)
rf_model.fit(X, y)
rf_feature_importances = pd.Series(rf_model.feature_importances_, index=X.columns)

# Normalize each ranking
min_max_scaler = lambda x: (x - x.min()) / (x.max() - x.min())

normalized_pcc = min_max_scaler(pcc_ranking)
normalized_mi = min_max_scaler(mi_ranking)
normalized_linear = min_max_scaler(linear_regression_coefficients)
normalized_rf = min_max_scaler(rf_feature_importances)

# Define your weights for each ranking method (you can adjust these weights)
weight_pcc = 0.2
weight_mi = 0.2
weight_linear = 0.3
weight_rf = 0.3

# Combine the normalized rankings with weights
combined_ranking = (
    weight_pcc * normalized_pcc +
    weight_mi * normalized_mi +
    weight_linear * normalized_linear +
    weight_rf * normalized_rf
)

# Create a DataFrame with the combined ranking
combined_ranking_df = pd.DataFrame({
    'Feature': combined_ranking.index,
    'Combined Ranking': combined_ranking.values
})

# Sort the features by the combined ranking
sorted_features = combined_ranking_df.sort_values(by='Combined Ranking', ascending=False)

Ranking_APT=sorted_features

Ranking_APT

Unnamed: 0,Feature,Combined Ranking
0,Sell_Price,0.700083
9,IR,0.424419
6,CA_index,0.286221
15,Subway_Counts,0.286177
3,CR,0.257451
10,Crime_Rates,0.238293
7,TC_index,0.208542
11,Total_Pop,0.173662
5,LC_index,0.173659
14,School_Counts,0.158321


In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Load your dataset
data = pd.read_csv('/content/Total_Officetel_for_Target_Features.csv', encoding='utf-8')

# Define X (features) and y (target variable)
X = data.drop(columns=['JS_Price'])
y = data['JS_Price']

# Step 1: Calculate Pearson Correlation Coefficients (PCC)
pcc_ranking = X.corrwith(y).abs()

# Step 2: Compute Mutual Information Scores
mi_scores = mutual_info_regression(X, y)
mi_ranking = pd.Series(mi_scores, index=X.columns)

# Step 3: Fit a Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X, y)
linear_regression_coefficients = pd.Series(linear_model.coef_, index=X.columns).abs()

# Step 4: Train a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=1000, random_state=100)
rf_model.fit(X, y)
rf_feature_importances = pd.Series(rf_model.feature_importances_, index=X.columns)

# Normalize each ranking
min_max_scaler = lambda x: (x - x.min()) / (x.max() - x.min())

normalized_pcc = min_max_scaler(pcc_ranking)
normalized_mi = min_max_scaler(mi_ranking)
normalized_linear = min_max_scaler(linear_regression_coefficients)
normalized_rf = min_max_scaler(rf_feature_importances)

# Define your weights for each ranking method (you can adjust these weights)
weight_pcc = 0.2
weight_mi = 0.2
weight_linear = 0.3
weight_rf = 0.3

# Combine the normalized rankings with weights
combined_ranking = (
    weight_pcc * normalized_pcc +
    weight_mi * normalized_mi +
    weight_linear * normalized_linear +
    weight_rf * normalized_rf
)

# Create a DataFrame with the combined ranking
combined_ranking_df = pd.DataFrame({
    'Feature': combined_ranking.index,
    'Combined Ranking': combined_ranking.values
})

# Sort the features by the combined ranking
sorted_features = combined_ranking_df.sort_values(by='Combined Ranking', ascending=False)

Ranking_Officetel=sorted_features

Ranking_Officetel


Unnamed: 0,Feature,Combined Ranking
0,Sell_Price,0.675889
9,IR,0.449618
3,CR,0.443372
5,LC_index,0.272796
7,TC_index,0.262735
15,Subway_Counts,0.257872
6,CA_index,0.206478
11,Total_Pop,0.201256
14,School_Counts,0.195378
12,Univ_Counts,0.163809


In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Load your dataset
data = pd.read_csv('/content/Total_Single_Family_for_Target_Features.csv', encoding='utf-8')

# Define X (features) and y (target variable)
X = data.drop(columns=['JS_Price'])
y = data['JS_Price']

# Step 1: Calculate Pearson Correlation Coefficients (PCC)
pcc_ranking = X.corrwith(y).abs()

# Step 2: Compute Mutual Information Scores
mi_scores = mutual_info_regression(X, y)
mi_ranking = pd.Series(mi_scores, index=X.columns)

# Step 3: Fit a Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X, y)
linear_regression_coefficients = pd.Series(linear_model.coef_, index=X.columns).abs()

# Step 4: Train a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=1000, random_state=100)
rf_model.fit(X, y)
rf_feature_importances = pd.Series(rf_model.feature_importances_, index=X.columns)

# Normalize each ranking
min_max_scaler = lambda x: (x - x.min()) / (x.max() - x.min())

normalized_pcc = min_max_scaler(pcc_ranking)
normalized_mi = min_max_scaler(mi_ranking)
normalized_linear = min_max_scaler(linear_regression_coefficients)
normalized_rf = min_max_scaler(rf_feature_importances)

# Define your weights for each ranking method (you can adjust these weights)
weight_pcc = 0.2
weight_mi = 0.2
weight_linear = 0.3
weight_rf = 0.3

# Combine the normalized rankings with weights
combined_ranking = (
    weight_pcc * normalized_pcc +
    weight_mi * normalized_mi +
    weight_linear * normalized_linear +
    weight_rf * normalized_rf
)

# Create a DataFrame with the combined ranking
combined_ranking_df = pd.DataFrame({
    'Feature': combined_ranking.index,
    'Combined Ranking': combined_ranking.values
})

# Sort the features by the combined ranking
sorted_features = combined_ranking_df.sort_values(by='Combined Ranking', ascending=False)

Ranking_Single_Family_House=sorted_features

Ranking_Single_Family_House

Unnamed: 0,Feature,Combined Ranking
0,Sell_Price,0.651561
10,Crime_Rates,0.435891
9,IR,0.376319
15,Subway_Counts,0.291987
6,CA_index,0.2878
3,CR,0.24298
5,LC_index,0.23886
11,Total_Pop,0.23155
7,TC_index,0.202894
13,Park_Counts,0.170662


In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Load your dataset
data = pd.read_csv('/content/Total_Townhouse_for_Target_Features.csv', encoding='utf-8')

# Define X (features) and y (target variable)
X = data.drop(columns=['JS_Price'])
y = data['JS_Price']

# Step 1: Calculate Pearson Correlation Coefficients (PCC)
pcc_ranking = X.corrwith(y).abs()

# Step 2: Compute Mutual Information Scores
mi_scores = mutual_info_regression(X, y)
mi_ranking = pd.Series(mi_scores, index=X.columns)

# Step 3: Fit a Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X, y)
linear_regression_coefficients = pd.Series(linear_model.coef_, index=X.columns).abs()

# Step 4: Train a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=1000, random_state=100)
rf_model.fit(X, y)
rf_feature_importances = pd.Series(rf_model.feature_importances_, index=X.columns)

# Normalize each ranking
min_max_scaler = lambda x: (x - x.min()) / (x.max() - x.min())

normalized_pcc = min_max_scaler(pcc_ranking)
normalized_mi = min_max_scaler(mi_ranking)
normalized_linear = min_max_scaler(linear_regression_coefficients)
normalized_rf = min_max_scaler(rf_feature_importances)

# Define your weights for each ranking method (you can adjust these weights)
weight_pcc = 0.2
weight_mi = 0.2
weight_linear = 0.3
weight_rf = 0.3

# Combine the normalized rankings with weights
combined_ranking = (
    weight_pcc * normalized_pcc +
    weight_mi * normalized_mi +
    weight_linear * normalized_linear +
    weight_rf * normalized_rf
)

# Create a DataFrame with the combined ranking
combined_ranking_df = pd.DataFrame({
    'Feature': combined_ranking.index,
    'Combined Ranking': combined_ranking.values
})

# Sort the features by the combined ranking
sorted_features = combined_ranking_df.sort_values(by='Combined Ranking', ascending=False)

Ranking_Town_House=sorted_features

Ranking_Town_House

Unnamed: 0,Feature,Combined Ranking
0,Sell_Price,0.669521
10,Crime_Rates,0.465923
7,TC_index,0.365147
5,LC_index,0.362977
6,CA_index,0.311974
9,IR,0.223957
11,Total_Pop,0.211723
15,Subway_Counts,0.208961
13,Park_Counts,0.143712
14,School_Counts,0.111564
