In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# family = "AppleGothic"
family = "Malgun Gothic"

plt.rc('font', family=family)

In [None]:
df = pd.read_csv('data/서울특별시/apartment_df.csv', index_col=0)
df

In [None]:
corr_df = df.corr()
corr_df

In [None]:
sns.heatmap(corr_df, annot=True, cmap="RdYlBu_r", vmin=-1, vmax=1)

In [None]:
sns.clustermap(corr_df, annot=True, cmap="RdYlBu_r", vmin=-1, vmax=1)

In [None]:
df.head(1)

In [None]:
sns.boxplot(data=df, sym='r+', y='transaction_real_price')

In [None]:
df.sort_values('transaction_real_price', ascending=False)

In [None]:
location = pd.read_csv('data/동별좌표.csv')
location

In [None]:
def search_location(dong):
    try : 
        dong_data = location[location['동'] == dong].iloc[0]
        return (dong_data['loc'], dong_data['lat'])
    except :
        print(dong)
        return (np.nan, np.nan)

In [None]:
len(np.unique(df['dong']))

In [None]:
price_for_dong = df.groupby('dong').mean()[['transaction_real_price']]
price_for_dong = price_for_dong.reset_index()
price_for_dong

In [None]:
count_for_dong = df.groupby('dong').count()[['transaction_real_price']]
count_for_dong = count_for_dong.reset_index()
count_for_dong

In [None]:
merge_df = pd.merge(price_for_dong, count_for_dong, on='dong')
merge_df.columns = ['dong', 'mean_price', 'count']
merge_df

In [None]:
merge_df[['loc', 'lat']] = merge_df['dong'].apply(search_location).apply(pd.Series)
merge_df

In [None]:
sns.scatterplot(data=merge_df, x='lat', y='loc', hue='dong', size='mean_price', sizes=(10, 200), legend=False)

In [None]:
import plotly.express as px
fig = px.scatter(merge_df, x='lat', y='loc', color='dong', size='mean_price')
fig.show()

In [None]:
sns.scatterplot(data=merge_df, x='lat', y='loc', hue='dong', size='count', sizes=(10, 200), legend=False)

In [None]:
import plotly.express as px
fig = px.scatter(merge_df, x='lat', y='loc', color='dong', size='count')
fig.show()

In [None]:
# !pip install folium

In [None]:
import folium

m = folium.Map(location=[37.5, 127], zoom_start=11)
m

In [None]:
for idx, row in merge_df.iterrows():
    location = [row['loc'], row['lat']]
    folium.CircleMarker(
        location=location,
        popup=row['dong'] + str(int(row['mean_price'])),
        radius=row['mean_price']/10000,
        fill=True,
    ).add_to(m)
m   

In [None]:
for idx, row in merge_df.iterrows():
    location = [row['loc'], row['lat']]
    folium.CircleMarker(
        location=location,
        popup=row['dong'] + str(int(row['count'])),
        radius=row['count']/1000,
        color='red',
        fill=True,
    ).add_to(m)
m   

#### 의미있는 컬럼을 더 만들어보자

In [None]:
df['transaction_year'] = (df['transaction_year_month']/100).astype(int)
df['transaction_month'] = df['transaction_year_month']%100
df

In [None]:
df['transaction_date_first'] = df['transaction_date'].str.split('~').str[0].astype(int)
df

In [None]:
df['real_price_per_m2'] = df['transaction_real_price'] / df['exclusive_use_area']
df

In [None]:
dong_groupby = df.groupby('dong').mean()
dong_groupby

In [None]:
corr_df = dong_groupby.corr()

In [None]:
sns.clustermap(corr_df, annot=True, cmap="RdYlBu_r", vmin=-1, vmax=1)

In [None]:
dong_groupby = dong_groupby[['exclusive_use_area', 'transaction_real_price', 'real_price_per_m2']].reset_index()
dong_groupby

In [None]:
dong_groupby.columns = ["dong_" + column if column != 'dong'else column for column in dong_groupby.columns ]
dong_groupby

In [None]:
df = pd.merge(df, dong_groupby, on='dong')

In [None]:
apart_groupby = df.groupby('apartment_id').mean()

In [None]:
corr_df = apart_groupby.corr()

In [None]:
sns.clustermap(corr_df, annot=True, cmap="RdYlBu_r", vmin=-1, vmax=1)

In [None]:
apart_groupby = apart_groupby[['exclusive_use_area', 'transaction_real_price', 'real_price_per_m2']].reset_index()
apart_groupby

In [None]:
apart_groupby.columns = ["apart_" + column if column != 'apartment_id'else column for column in apart_groupby.columns ]
apart_groupby

In [None]:
df = pd.merge(df, apart_groupby)
df

In [None]:
corr_df = df.corr()

In [None]:
sns.heatmap(corr_df, annot=True, fmt='.1f', cmap='RdYlBu_r', vmin=-1, vmax=1, cbar=False)

In [None]:
sns.clustermap(corr_df, annot=True, fmt='.1f', cmap='RdYlBu_r', vmin=-1, vmax=1, cbar=False)