In [1]:
import pandas as pd
import plotly.graph_objs as go
import dash
import dash_core_components as dcc                  # 交互式组件
import dash_html_components as html                 # 代码转html
from dash.dependencies import Input, Output         # 回调
from jupyter_plotly_dash import JupyterDash         
import numpy as np

In [2]:
records=pd.read_csv('data-utf8.csv')
df = pd.DataFrame(records)
df.drop(['id'],axis=1)
#各区平均面积
groups_area_jzmj = df['area'].groupby(df['district'])
mean_area = groups_area_jzmj.mean()
#各区平均单价
groups_area_unitprice=df['unit_price'].groupby(df['district'])
mean_unitprice=groups_area_unitprice.mean()
#各区单价箱线图
box_unitprice_district=df['unit_price'].groupby(df['district'])
box_data_u = pd.DataFrame(list(range(14000)),columns=["start"])
for district,price in box_unitprice_district:
    box_data_u[district]=price
del box_data_u['start']
#各区总价箱线图
box_totalprice_district=df['total_price'].groupby(df['district'])
box_data_t = pd.DataFrame(list(range(14000)),columns=["start"])
for district,price in box_totalprice_district:
    box_data_t[district]=price
del box_data_t['start']
#面积分布
area_level = [0, 50, 100, 150, 200, 250, 300, 500,1000]    
label_level = ['0-50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-500','500-1000']    
area_cut = pd.cut(df['area'], area_level, labels=label_level)        
area_result = area_cut.value_counts()


In [174]:
app = JupyterDash('Hello Dash Style')
colors = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c574b']
t=np.random.randn(20000)
app.layout = html.Div(
   [
       dcc.Slider(
           id='slider1',
           min=0,
           max=10,
           value=0,
           step=1
       ),
       html.Div(id='number'),
       html.H3(children='威海房产高频词',
               style = dict(textAlign = 'center', color = 'black')),
       html.Img(id='wordcloud_img',src='https://s1.ax1x.com/2020/04/17/JEKHde.md.jpg'),
       html.Div([
          dcc.Graph(id='district_housenum',
                    figure=dict(
                    data=[go.Bar(
                    x=df['district'].sort_values().unique(),
                    y=df['district'].sort_values().value_counts(),
                    marker=dict(color=colors),
                    textposition = 'auto',
                    opacity=0.7,
                    text=df['district'].sort_values().value_counts()
                    )],
    
                    layout=go.Layout(title='各区房源数量',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                    )
                    
         )
        ),
           dcc.Graph(
               id='district_houseuse',
                    figure=dict(
                    data=[go.Bar(
                    x=mean_area.keys(),
                    y=mean_area.unique(),
                    marker=dict(color=colors),
                    textposition = 'auto',
                    opacity=0.7
                    )],
    
                    layout=go.Layout(title='各区在售房平均面积',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                    )
                    
         )
           )
           ,
            dcc.Graph(
               id='district_houseuse',
                    figure=dict(
                    data=[go.Bar(
                    x=mean_unitprice.keys(),
                    y=mean_unitprice.unique(),
                    marker=dict(color=colors),
                    textposition = 'auto',
                    opacity=0.7
                    )],
    
                    layout=go.Layout(title='各区在售房平均单价',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                     
                                    )
                    
         )
           )
           ,
            dcc.Graph(
               id='district_boxu',
                    figure=dict(
                    data=[
                        go.Box(
                    y=box_data_u['乳山市'],
                    name='乳山市',
                    ),
                   go.Box(
                    y=box_data_u['文登区'],
                    name='文登区',
                    ),
                     go.Box(
                    y=box_data_u['环翠区'],
                    name='环翠区',
                    ),
                     go.Box(
                    y=box_data_u['经区'],
                    name='经区',
                    ),
                    go.Box(
                    y=box_data_u['荣成市'],
                    name='荣成市',
                    ),
                    go.Box(
                    y=box_data_u['高区'],
                    name='高区',
                    ) 
                    ],
    
                    layout=go.Layout(title='各区在售房总价箱线图',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                    )
                    
         )
           )
           ,
           dcc.Graph(
               id='district_boxt',
                    figure=dict(
                    data=[
                        go.Box(
                    y=box_data_t['乳山市'],
                    name='乳山市',
                    ),
                   go.Box(
                    y=box_data_t['文登区'],
                    name='文登区',
                    ),
                     go.Box(
                    y=box_data_t['环翠区'],
                    name='环翠区',
                    ),
                     go.Box(
                    y=box_data_t['经区'],
                    name='经区',
                    ),
                    go.Box(
                    y=box_data_t['荣成市'],
                    name='荣成市',
                    ),
                    go.Box(
                    y=box_data_t['高区'],
                    name='高区',
                    ) 
                    ],
    
                    layout=go.Layout(title='各区在售房单价箱线图',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                    )
                    
         )
           )
           ,
           dcc.Graph(
               id='district_houseuse',
                    figure=dict(
                    data=[go.Bar(
                    x=df['house_use'].unique(),
                    y=df['house_use'].sort_values().value_counts(),
                    marker=dict(color=['#7fdbff','#fd9900','#fd00b2']),
                    textposition = 'auto',
                    opacity=0.7,
                    text=df['house_use'].sort_values().value_counts()
                    )],
    
                    layout=go.Layout(title='威海市房屋用途',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                    )
                    
         )
           ),
         dcc.Graph(
               id='area_distribute',
                    figure=dict(
                    data=[go.Scatter(
                    x=['0-50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-500','500-1000'],
                    y=[area_result['0-50'],area_result['50-100'],area_result['100-150'],area_result['150-200'],area_result['200-250'],
                      area_result['250-300'],area_result['300-500'],area_result['500-1000']],
                    mode = 'lines',
                    connectgaps = True,
                    )],
    
                    layout=go.Layout(title='威海市住房面积分布',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                    )
                    
         )
           ),
        dcc.Graph(
            id='area_price',
            figure=dict(
            data=[go.Scatter(
                x=df['area'],
                y=df['total_price'],
                mode='markers',
                marker=dict(
                color=t, #set color equal to a variable
                colorscale='Viridis', # one of plotly colorscales
                showscale=True
                )
            )],
                layout=go.Layout(title='威海市住房总价与建筑面积散点图', paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),)
            )
        ),
        dcc.Graph(
            id='area_unit_price',
            figure=dict(
            data=[go.Scatter(
                x=df['area'],
                y=df['unit_price'],
                mode='markers',
                marker=dict(
                color=t, #set color equal to a variable
                colorscale='Viridis', # one of plotly colorscales
                showscale=True
                )
            )],
                layout=go.Layout(title='威海市住房单价与建筑面积散点图', paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),)
            )
        ),
           dcc.Graph(
               id='house_type_precentage',
               figure=dict(
                   data=[
                       go.Pie(
                       labels=df['house_type'].value_counts().keys(),
                       values=df['house_type'].value_counts())
                   ],
                   layout=go.Layout(title='威海市房型占比饼状图',paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'))
               
               
               
               )
           ),
           dcc.Graph(
               id='fixture_precentage',
               figure=dict(
                   data=[
                       go.Pie(
                       labels=df['fixture'].value_counts().keys(),
                       values=df['fixture'].value_counts())
                   ],
                   layout=go.Layout(title='威海市装修占比饼状图',paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'))
               
               
               
               )
           ),
           dcc.Graph(
               id='building_type_precentage',
               figure=dict(
                   data=[
                       go.Pie(
                       labels=df['building_type'].value_counts().keys(),
                       values=df['building_type'].value_counts())
                   ],
                   layout=go.Layout(title='威海市房源建筑类型占比饼状图',paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'))
               
               
               
               )
           ),
           dcc.Graph(
               id='building_structure_precentage',
               figure=dict(
                   data=[
                       go.Pie(
                       labels=df['building_structure'].value_counts().keys(),
                       values=df['building_structure'].value_counts())
                   ],
                   layout=go.Layout(title='威海市房源建筑结构占比饼状图',paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'))
               
               
               
               )
           ),
        dcc.Graph(
               id='elevator_precentage',
               figure=dict(
                   data=[
                       go.Pie(
                       labels=df['elevator_ratio'].value_counts().keys(),
                       values=df['elevator_ratio'].value_counts())
                   ],
                   layout=go.Layout(title='威海市房源梯户占比饼状图',paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'))
               
               
               
               )
           ),
           dcc.Graph(
               id='overall_floor',
                    figure=dict(
                    data=[go.Bar(
                    x=df['overall_floor'].value_counts().keys(),
                    y=df['overall_floor'].value_counts(),
                    marker=dict(colorscale='Viridis',color=t,
                               showscale=True),
                    textposition = 'auto',
                    opacity=0.7
                    )],
    
                    layout=go.Layout(title='各区房源楼层数柱状图',
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                     
                                    )
                    
         )
           )
           ,
           dcc.Dropdown(id='district_choose',
                    options=[
            {'label':'乳山市', 'value': '乳山市'},
            {'label': '文登区', 'value': '文登区'},
            {'label': '环翠区', 'value': '环翠区'},
            {'label': '经区', 'value': '经区'},
            {'label': '荣成市', 'value': '荣成市'},
            {'label': '高区', 'value': '高区'}
            
        ],
        value='环翠区'
                       ),
           dcc.Graph(id='top20house'),
          dcc.Slider(
           id='slider2',
           min=0,
           max=20,
           value=5,
           step=1
       ),
           dcc.Graph(id='top20xiaoqu'),
           
           
           
       ]
       )
   ]
)
@app.callback(
    Output(component_id='number',component_property='children'),
    [Input(component_id='slider1',component_property='value')]
)

def update_value(input_value):
    return '你输入了"{}"'.format(input_value)
@app.callback(
    Output(component_id='top20house',component_property='figure'),
    [Input(component_id='district_choose',component_property='value')]
)
def district_update(district_name):
    unitprice_top = df.sort_values(by="unit_price",ascending=False)[:2000]
    unitprice_top = unitprice_top.sort_values(by="unit_price")
    unitprice_top.set_index(unitprice_top["xiaoqu"],inplace=True)
    x_value=unitprice_top[unitprice_top['district']==district_name][:10].index
    y_value=unitprice_top[unitprice_top['district']==district_name][:10]['unit_price'].values
    text_name=district_name+'单价TOP房源'
    traces=[]
    traces.append(go.Bar(
            x = x_value,
            y = y_value,
            textposition = 'auto',
            opacity=0.7,
            marker=dict(colorscale=[[0.0, "rgb(165,0,38)"],
                [0.1111111111111111, "rgb(215,48,39)"],
                [0.2222222222222222, "rgb(244,109,67)"],
                [0.3333333333333333, "rgb(253,174,97)"],
                [0.4444444444444444, "rgb(254,224,144)"],
                [0.5555555555555556, "rgb(224,243,248)"],
                [0.6666666666666666, "rgb(171,217,233)"],
                [0.7777777777777778, "rgb(116,173,209)"],
                [0.8888888888888888, "rgb(69,117,180)"],
                [1.0, "rgb(49,54,149)"]],color=t,
                               showscale=True)
        
            
        ))
    fig = dict(
        data = traces,
        layout=go.Layout(title=text_name,
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                     
            )
    )
    return fig

@app.callback(
    Output(component_id='top20xiaoqu',component_property='figure'),
    [Input(component_id='slider2',component_property='value')]
)
def xiaoqu_update(slider_value):
    top_xiaoqu=df['unit_price'].groupby(df['xiaoqu']).mean().sort_values(ascending=False)[:20]
    x_value=top_xiaoqu.index[:slider_value]
    y_value=top_xiaoqu.values[:slider_value]
    text_name='单价TOP'+str(slider_value)+'小区'
    traces=[]
    traces.append(go.Bar(
            x = x_value,
            y = y_value,
            textposition = 'auto',
            opacity=0.7,
            marker=dict(colorscale='Viridis',color=t,
                               showscale=True)
            
        ))
    fig = dict(
        data = traces,
        layout=go.Layout(title=text_name,
                                     paper_bgcolor = "#111111",
                                    plot_bgcolor='#111111',
                                    font=dict(family = "Times New Roman",size=20,color='#7fdbff'),
                                     
            )
    )
    return fig
    

    

In [175]:
app

In [141]:
dcc.Graph(id='district_housenum',
                    figure=dict(
                    data=[go.Bar(
                    x=df['house_use'].unique(),
                    y=df['house_use'].sort_values().value_counts()
                    )]
         )
        )

Graph(id='district_housenum', figure={'data': [Bar({
    'x': array(['普通住宅', '商住两用', '别墅'], dtype=object), 'y': array([13883,   177,    96], dtype=int64)
})]})

In [114]:
df['unit_price'].groupby(df['xiaoqu'])

<pandas.core.groupby.generic.SeriesGroupBy object at 0x00000157BBF3E978>

In [135]:
top_xiaoqu=df['unit_price'].groupby(df['xiaoqu']).mean().sort_values(ascending=False)[:2000].values

In [136]:
top_xiaoqu

array([40295.        , 32004.66666667, 27889.        , ...,
        2252.        ,  2188.        ,  2106.        ])