In [1]:
import requests
import re
import json
import pandas as pd
import datetime
import numpy as np

import geopandas as gpd
import folium
from folium import IFrame

import plotly
import plotly.express as px
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
from dash import dash_table
from dash.exceptions import PreventUpdate
from jupyter_dash import JupyterDash

pd.set_option('display.max_columns', None) 
pd.options.mode.chained_assignment = None

In [2]:
df_full = pd.read_csv('full_dataset.csv')
municipalities = gpd.read_file('output.geojson')

In [39]:
df_full['construction_era'] = pd.cut(df_full['construction_year'], 
                                     bins=[0, 1970, 1980, 1990, 2000, 2010, 2020, 2030], 
                                     labels=['< 1970', '1970-1980', '1980-1990', '1990-2000', '2000-2010', '2010-2020', '> 2020'])

df_full['area_group'] = pd.cut(df_full['area'], 
                                     bins=[0, 20, 40, 60, 10000], 
                                     labels=['< 20', '21-40', '41-60', '61+'])

df_full['bedrooms_group'] = pd.cut(df_full['bedrooms'], 
                                   bins=[0, 1, 2, 3, 10000], 
                                   labels=['1', '2', '3', '4+'])

df_full['level_group'] = pd.cut(df_full['min_level'], 
                                bins=[-100, -1, -0.5, 0, 0.5, 1, 2, 10000], 
                                labels=['Υπόγειο', 'Ημιυπόγειο', 'Ισόγειο', 'Ημιώροφος', '1', '2', '3+'])


In [3]:
daily_ids = {date: set(df_full.loc[df_full['extraction_date'] == date, 'id']) for date in df_full['extraction_date'].unique()}
# daily_ids_df = pd.from_dict(daily_ids)
df_population = pd.DataFrame(daily_ids.items(), columns=['extraction_date','ids'])
df_population['ids_yesterday'] = df_population['ids'].shift(1)
df_population.dropna(inplace=True)
df_population['in'] = (df_population['ids'] - df_population['ids_yesterday']).apply(len)
df_population['out'] = (df_population['ids_yesterday'] - df_population['ids']).apply(len)

Unnamed: 0,extraction_date,ids,ids_yesterday,in,out
1,2023-06-02,"{814874624, 815104013, 815267868, 812941342, 8...","{814874624, 815104013, 812941342, 815038502, 8...",296,300
2,2023-06-03,"{814874624, 801701890, 815104013, 815267868, 8...","{814874624, 815104013, 815267868, 812941342, 8...",229,200
3,2023-06-04,"{814874624, 801701890, 815104013, 815267868, 8...","{814874624, 801701890, 815104013, 815267868, 8...",107,114
4,2023-06-05,"{814874624, 801701890, 815104013, 33095702, 81...","{814874624, 801701890, 815104013, 815267868, 8...",189,203


In [9]:
fig_bar_population_new_left = px.bar(df_population, x='extraction_date', y=['in', 'out'], orientation='v', 
                                     barmode="group")
fig_bar_population_new_left.update_traces(marker_color='darkcyan', selector=dict(type='bar', name='in'))
fig_bar_population_new_left.update_traces(marker_color='darkturquoise', selector=dict(type='bar', name='out'))

fig_bar_population_new_left.update_traces(text=df_population['in'], textposition='auto', selector=dict(type='bar', name='in'))
fig_bar_population_new_left.update_traces(text=df_population['out'], textposition='auto', selector=dict(type='bar', name='out'))

fig_bar_population_new_left.update_layout(
    title="Incoming and outgoing ads",
    title_font=dict(size=24),
    title_x=0.5
)

fig_bar_population_new_left.update_layout(
    xaxis_title='Date',
    yaxis_title='Ad Count'
)

df_ad_age = df_full[df_full['extraction_date'] == "2023-06-05"].groupby('days_passed').size().reset_index(name='Count')
df_ad_age['age'] = pd.cut(df_ad_age['days_passed'], bins=[0, 7, 14, 21, 28, 60], labels=['< 1 week', '< 2 weeks', '< 3 weeks', '< 1 month', '< 2 months'])
df_age_graph = df_ad_age.groupby('age')['Count'].sum().reset_index(name='Count')
fig_ad_age = px.bar(df_age_graph, x='age', y='Count', 
             orientation='v', title='Ad age (2023-06-05)')
fig_ad_age.update_traces(marker_color='darkcyan')
fig_ad_age.update_traces(text=df_age_graph['Count'], textposition='auto')
fig_ad_age.update_layout(
    xaxis_title='Age',
    yaxis_title='Count',
    title_x=0.5
)

None

In [10]:
df = df_full[(df_full['extraction_date'] == "2023-06-05") & (df_full['level_length'] > 0)].groupby('max_level').aggregate({'price_per_area': ['mean', 'count']}).reset_index()
df.columns = ['max_level', 'price_per_area', 'count']

fig_levels = px.bar(df, x='price_per_area', y='max_level', orientation='h', hover_data='count')

fig_levels.update_traces(marker_color='darkcyan')
fig_levels.update_traces(text=df['price_per_area'], textposition='auto', texttemplate='%{text:.4s}')
fig_levels.update_layout(
    xaxis_title='Average price per area',
    yaxis_title='Level',
    title='Prices for different house levels',
    title_x=0.5
)

fig_levels.show()

In [24]:
df = df_full[(df_full['extraction_date'] == "2023-06-05") & (df_full['level_length'] > 0)].groupby('level_length').aggregate({'price_per_area': ['mean', 'count']}).reset_index()
df.columns = ['level_length', 'price_per_area', 'count']
fig_level_length = px.bar(df, x='price_per_area', y='level_length', orientation='h', hover_data='count')

fig_level_length.update_traces(marker_color='darkcyan')
fig_level_length.update_traces(text=df['price_per_area'], textposition='auto', texttemplate='%{text:.4s}')
fig_level_length.update_layout(
    xaxis_title='Average price per area',
    yaxis_title='Levels',
    title='Do house levels affect price per area?',
    title_x=0.5
)

fig_level_length.show()

In [226]:
x = df_full[df_full['extraction_date'] == "2023-06-05"].groupby(['bedrooms', 'bathrooms']).aggregate({'price_per_area': ['mean', 'count']}).reset_index()
x.columns = ['bedrooms', 'bathrooms', 'price', 'count']
x[x['count'] > 50] .sort_values('price', ascending=False)

In [40]:
# app = JupyterDash(__name__, external_stylesheets=['styles_test.css'])
# app = JupyterDash(__name__, external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css'])
app = JupyterDash(__name__)
# df = df_full[['type', 'dimos', 'price', 'area', 'bedrooms', 'bathrooms', 'min_level', 'construction_year', 'extraction_date']]
# df = df_full[['extraction_date', 'dimos', 'type',  'price', 'area', 'bedrooms', 'bathrooms', 'min_level', 'construction_year']]
cols_to_keep = ['extraction_date', 'dimos', 'perioxi', 'type',  'price', 'area', 'bedrooms', 'bathrooms', 'min_level', 'construction_year']

page1_layout = html.Div(
    className="container",
    children=[
        html.H1("Data exploration"),
        html.Div(
            style={
                "display": "inline-block",
                "verticalAlign": "top",
                "width": "20%",
                "padding": "10px",
            },
            children=[
                html.Label("Ημερομηνία:"),
                dcc.Dropdown(
                    id='date-filter-1',
                    options=[
                        {'label': str(date), 'value': date}
                        for date in df_full['extraction_date'].unique()
                    ],
                    value=None
                ),
                html.Br(),
                
                html.Label("Δήμος:"),
                dcc.Dropdown(
                    id='dimos-filter-1',
                    options=[
                        {'label': dimos, 'value': dimos}
                        for dimos in df_full['dimos'].unique()
                    ],
                    value=None
                ),
                html.Br(),
                
#                 html.Label("Περιοχή:"),
#                 dcc.Dropdown(
#                     id='perioxi-filter-1',
#                     options=[
#                         {'label': perioxi, 'value': perioxi}
#                         for perioxi in df_full['perioxi'].unique()
#                     ],
#                     value=None
#                 ),
#                 html.Br(),
                
                html.Label("Είδος Κατοικίας:"),
                dcc.Dropdown(
                    id='type-filter-1',
                    options=[
                        {'label': dimos, 'value': dimos}
                        for dimos in df_full['type'].unique()
                    ],
                    value=None
                ),
                html.Br(),
                
                html.Label("Τιμή:"),
                dcc.RangeSlider(
                    id='price-range-slider-1',
                    min=100,
                    max=df_full['price'].max(),
                    step=50,
                    value=[300, 600],
                    marks={100: '100', int(df_full['price'].max()): str(int(df_full['price'].max()))},
                ),
                html.Div(id='selected-range-price-1'),
                html.Br(),
                
                html.Label("Εμβαδόν:"),
                dcc.RangeSlider(
                    id='area-range-slider-1',
                    min=0,
                    max=df_full['area'].max(),
                    step=50,
                    value=[40, 80],
                    marks={int(value): str(int(value)) for value in [0, df_full['area'].max()]},
                ),
                html.Div(id='selected-range-area-1'),
                html.Br(),
                
                html.Label("Υπνοδωμάτια:"),
                dcc.RangeSlider(
                    id='bedrooms-range-slider-1',
                    min=0,
                    max=df_full['bedrooms'].max(),
                    step=1,
                    value=[1, 2],
                    marks={int(value): str(int(value)) for value in [0, df_full['bedrooms'].max()]},
                ),
                html.Div(id='selected-range-bedrooms-1'),
                html.Br(),
                
                html.Label("Μπάνια:"),
                dcc.RangeSlider(
                    id='bathrooms-range-slider-1',
                    min=0,
                    max=df_full['bathrooms'].max(),
                    step=1,
                    value=[1, 2],
                    marks={int(value): str(int(value)) for value in [0, df_full['bathrooms'].max()]},
                ),
                html.Div(id='selected-range-bathrooms-1'),
                html.Br(),
                
                html.Label("Όροφος:"),
                dcc.RangeSlider(
                    id='levels-range-slider-1',
                    min=0,
                    max=df_full['min_level'].max(),
                    step=0.5,
                    value=[1, 2],
                    marks={int(value): str(int(value)) for value in [0, df_full['min_level'].max()]},
                ),
                html.Div(id='selected-range-levels-1'),
                html.Br(),
            ],
        ),
        html.Div(
            style={
                "display": "inline-block",
                "width": "70%",
                "padding": "10px",
            },
            children=[
                dash_table.DataTable(
                    id='table',
#                     columns=[{'name': col, 'id': col} for col in df.columns],
                    columns=[{'name': greek_col, 'id': col} for greek_col, col in zip(['Ημερομηνία', 'Δήμος', 'Περιοχή', 'Είδος Κατοικίας', "Τιμή", 'Εμβαδόν', 'Υπνοδωμάτια', 'Μπάνια', 'Όροφος', 'Έτος Κατασκευής'], cols_to_keep)],
                    style_table={'overflowX': 'auto'},
                    style_cell={
                        'minWidth': '0px', 'maxWidth': '180px',
                        'whiteSpace': 'normal',
                        'textAlign': 'left',
                        'userSelect': 'text',
                    },
                ),
                html.Br(),
                dcc.Graph(id='fig-population', figure=fig_bar_population_new_left),
                html.Br(),
                dcc.Graph(id='fig-ad-age', figure=fig_ad_age),
                html.Br(),
                dcc.Link('Go to Page 2', href='/page2'),
            ],
        ),
    ],
)

page2_layout = html.Div(
    className="container",
    children=[
        html.H1("Price driving factors"),
        html.Div(
            style={
                "display": "inline-block",
                "verticalAlign": "top",
                "width": "20%",
                "padding": "10px",
            },
            children=[
                html.Label("Μορφή χάρτη:"),
                dcc.Dropdown(
                    id='map-type-filter-2',
                    options=[
                        {'label': map_type, 'value': map_type}
                        for map_type in ['Τιμή', 'Προσφορά']
                    ],
                    value='Τιμή'
                ),
                html.Label("Ημερομηνία:"),
                dcc.Dropdown(
                    id='date-filter-2',
                    options=[
                        {'label': str(date), 'value': date}
                        for date in df_full['extraction_date'].unique()
                    ],
                    value='2023-06-05'
                ),
                html.Label("Δήμος:"),
                dcc.Dropdown(
                    id='dimos-filter-2',
                    options=[
                        {'label': dimos, 'value': dimos}
                        for dimos in df_full['dimos'].unique()
                    ],
                    value=None
                ),
            ],
        ),
        html.Div(
            style={
                "display": "inline-block",
                "width": "70%",
                "padding": "10px",
            },
            children=[
                html.Div(id='map-title-label-2'),
                html.Iframe(id='map-price', srcDoc = open('map.html', 'r').read(), width='100%', height='450'),
                html.Br(),
                dcc.Graph(id='fig-levels'),
                html.Br(),
                dcc.Graph(id='fig-level-length'),
                html.Br(),
                dcc.Link('Go to Page 1', href='/page1'),
            ],
        ),
    ],
)

app.layout = html.Div([
    dcc.Location(id='url', refresh=False),
    html.Div(id='page-content')
])

@app.callback(
    Output('selected-range-price-1', 'children'),
    Input('price-range-slider-1', 'value')
)
def update_selected_range_price(value):
    return f'Επιλεγμένο εύρος τιμής: {value[0]}€ - {value[1]}€'

@app.callback(
    Output('selected-range-area-1', 'children'),
    Input('area-range-slider-1', 'value')
)
def update_selected_range_area(value):
    return f'Επιλεγμένο εύρος εμβαδόν: {value[0]}m² - {value[1]}m²'

@app.callback(
    Output('selected-range-bedrooms-1', 'children'),
    Input('bedrooms-range-slider-1', 'value')
)
def update_selected_range_bedrooms(value):
    return f'Επιλεγμένο εύρος υπνοδωματίων: {value[0]} - {value[1]}'

@app.callback(
    Output('selected-range-bathrooms-1', 'children'),
    Input('bathrooms-range-slider-1', 'value')
)
def update_selected_range_area_bathrooms(value):
    return f'Επιλεγμένο εύρος μπάνιων: {value[0]} - {value[1]}'

@app.callback(
    Output('selected-range-levels-1', 'children'),
    Input('levels-range-slider-1', 'value')
)
def update_selected_range_levels(value):
    return f'Επιλεγμένο εύρος ορόφων: {value[0]} - {value[1]}'

@app.callback(
    Output('table', 'data'),
    [Input('date-filter-1', 'value'), 
     Input('dimos-filter-1', 'value'),
#      Input('perioxi-filter-1', 'value'),
     Input('type-filter-1', 'value'), 
     Input('price-range-slider-1', 'value'), 
     Input('area-range-slider-1', 'value'), 
     Input('bedrooms-range-slider-1', 'value'), 
     Input('bathrooms-range-slider-1', 'value'), 
     Input('levels-range-slider-1', 'value'), 
    ]
)
def update_df_1(date_filter_value, dimos_filter_value, type_filter_value, price_filter_tuple, area_filter_tuple, bedrooms_filter_tuple, bathrooms_filter_tuple, level_filter_tuple):
    
    filtered_df = df_full[cols_to_keep]
    filtered_df = filtered_df[(price_filter_tuple[0] <= filtered_df['price']) & (filtered_df['price'] <= price_filter_tuple[1])]
    filtered_df = filtered_df[(area_filter_tuple[0] <= filtered_df['area']) & (filtered_df['area'] <= area_filter_tuple[1])]
    filtered_df = filtered_df[(bedrooms_filter_tuple[0] <= filtered_df['bedrooms']) & (filtered_df['bedrooms'] <= bedrooms_filter_tuple[1])]
    filtered_df = filtered_df[(bathrooms_filter_tuple[0] <= filtered_df['bathrooms']) & (filtered_df['bathrooms'] <= bathrooms_filter_tuple[1])]
    filtered_df = filtered_df[(level_filter_tuple[0] <= filtered_df['min_level']) & (filtered_df['min_level'] <= level_filter_tuple[1])]
    
    if date_filter_value:
        filtered_df = filtered_df[filtered_df['extraction_date'] == date_filter_value]
        
    if dimos_filter_value:
        filtered_df = filtered_df[filtered_df['dimos'] == dimos_filter_value]
        
#     if perioxi_filter_value:
#         filtered_df = filtered_df[filtered_df['perioxi'] == perioxi_filter_value]
        
    if type_filter_value:
        filtered_df = filtered_df[filtered_df['type'] == type_filter_value]
    
    return filtered_df.head(10).to_dict('records')

@app.callback(
    [Output('map-price', 'srcDoc'), Output('map-title-label-2', 'value')],
    [Input('date-filter-2', 'value'), Input('map-type-filter-2', 'value')]
)
def update_page_2_map(date_filter_value, map_type_value):
    df_filtered = df_full[df_full['extraction_date'] == date_filter_value]
    dimos_df = df_filtered.groupby('dimos').agg({'price_per_area': 'mean', 'kwd_ypes': 'first', 'id': 'count'}).reset_index()
    dimos_df.columns = ['dimos', 'price_per_area', 'kwd_ypes', 'count']
    dimos_df['kwd_ypes'] = dimos_df['kwd_ypes'].astype(str)
    map_df = municipalities.merge(dimos_df, left_on="kwd_ypes", right_on="kwd_ypes", how="outer")
    
    if map_type_value == "Τιμή":
        important_cols = ['kwd_ypes', 'price_per_area']
        label_title = 'Τιμή ανά τετραγωνικό μέτρο'
    else:
        important_cols = ['kwd_ypes', 'count']
        label_title = 'Πλήθος αγγελιών'
    
    map_athens = folium.Map(location=[37.9838, 23.7275], zoom_start=11, tiles='openstreetmap')
    folium.Choropleth(
            geo_data='output.geojson',
            data=map_df,
            columns=important_cols,  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            key_on='feature.properties.kwd_ypes', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
            #threshold_scale=custom_scale, #use the custom scale we created for legend
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.8,
            line_opacity=0.2,
            legend_name='Price per unit area', #title of the legend
            highlight=True,
            line_color='black').add_to(map_athens)
    
    return map_athens.get_root().render(), label_title


@app.callback(
    [Output('fig-levels', 'figure'),
    Output('fig-level-length', 'figure')],
    [Input('date-filter-2', 'value'), 
    Input('dimos-filter-2', 'value')]
)
def update_page_2_graphs(date_filter_value, dimos_filter_value):
    if date_filter_value is None:
        raise PreventUpdate
    
    df_filtered = df_full[(df_full['extraction_date'] == date_filter_value) & (df_full['level_length'] > 0)]
    if dimos_filter_value:
        df_filtered = df_filtered[df_filtered['dimos'] == dimos_filter_value]
    
    df1 = df_filtered.groupby('level_group').aggregate({'price_per_area': ['mean', 'count']}).reset_index()
    df1.columns = ['level', 'price_per_area', 'count']
    
    fig_levels = px.bar(df1, x='price_per_area', y='level', orientation='h', hover_data='count')
    
    fig_levels.update_traces(marker_color='darkcyan')
    fig_levels.update_traces(text=df1['price_per_area'], textposition='auto', texttemplate='%{text:.4s}')
    fig_levels.update_layout(
        xaxis_title='Average price per area',
        yaxis_title='Level',
        title='Prices for different house levels',
        title_x=0.5
    )
    
    
    
    df2 = df_filtered.groupby('level_length').aggregate({'price_per_area': ['mean', 'count']}).reset_index()
    df2.columns = ['level_length', 'price_per_area', 'count']
    fig_level_length = px.bar(df2, x='price_per_area', y='level_length', orientation='h', hover_data='count')

    fig_level_length.update_traces(marker_color='darkcyan')
    fig_level_length.update_traces(text=df2['price_per_area'], textposition='auto', texttemplate='%{text:.4s}')
    fig_level_length.update_layout(
        xaxis_title='Average price per area',
        yaxis_title='Levels',
        title='Do house levels affect price per area?',
        title_x=0.5
    )
    
    return fig_levels, fig_level_length

@app.callback(
    dash.dependencies.Output('page-content', 'children'),
    [dash.dependencies.Input('url', 'pathname')]
)
def display_page(pathname):
    href_dict = {'/': page1_layout,
                '/page1': page1_layout,
                '/page2': page2_layout}
    return href_dict[pathname]

app.run_server(mode='inline')

Dash is running on http://127.0.0.1:8050/



In [69]:
df_full.groupby('construction_year')[['price']].mean()

Unnamed: 0_level_0,price
construction_year,Unnamed: 1_level_1
1900.0,1796.296296
1901.0,2760.000000
1910.0,2200.000000
1920.0,1300.000000
1921.0,1448.000000
...,...
2019.0,555.750000
2020.0,692.258065
2021.0,1040.000000
2022.0,825.555556


In [74]:
df_price_by_year = df_full.groupby('construction_era').agg({'price_per_area': 'mean', 'id': 'count'}).reset_index()
df_price_by_year.columns = ['construction_era', 'price_per_area', 'count']
df_price_by_year

Unnamed: 0,construction_era,price_per_area,count
0,< 1970,10.488437,8791
1,1970-1980,9.945812,8484
2,1980-1990,9.781677,1679
3,1990-2000,10.478575,951
4,2000-2010,11.09153,1334
5,2010-2020,11.545015,356
6,> 2020,13.496472,318


In [77]:
fig_age = px.bar(df_price_by_year, x='construction_era', y='price_per_area', orientation='v', hover_data='count')

fig_age.update_traces(marker_color='darkcyan')
fig_age.update_traces(text=df_price_by_year['price_per_area'], textposition='auto', texttemplate='%{text:.4s}')
fig_age.update_layout(
    xaxis_title='Construction Period',
    yaxis_title='Average price per area',
    title='Does construction period affect price?',
    title_x=0.5
)

fig_age.show()

In [30]:
df_area_group = df_full[df_full['extraction_date'] == '2023-06-01'].groupby('area_group').agg({'price_per_area': 'mean', 'id': 'count'}).reset_index()
df_area_group.columns = ['area_group', 'price_per_area', 'count']
df_area_group

Unnamed: 0,area_group,price_per_area,count
0,< 20,17.00835,23
1,21-40,12.553245,641
2,41-60,10.529414,1374
3,61+,9.573423,3376


In [31]:
fig_area_group = px.bar(df_area_group, x='area_group', y='count', orientation='v', hover_data='price_per_area')
# fig_area_group = px.bar(df_area_group, x='area_group', y='price_per_area', orientation='v', hover_data='count')

fig_area_group.update_traces(marker_color='darkcyan')
fig_area_group.update_traces(text=df_area_group['count'], textposition='auto')
fig_area_group.update_layout(
    xaxis_title='Area Group',
    yaxis_title='Ad Count',
    title='How are ads distributed area-wise?',
    title_x=0.5
)

fig_area_group.show()

In [27]:
df_bedrooms = df_full[df_full['extraction_date'] == '2023-06-01'].groupby('bedrooms_group').agg({'price_per_area': 'mean', 'id': 'count'}).reset_index()

df_bedrooms.columns = ['bedrooms', 'price_per_area', 'count']
df_bedrooms

Unnamed: 0,bedrooms,price_per_area,count
0,1,10.874374,2238
1,2,9.484031,2154
2,3,9.554705,655
3,3+,12.035485,144


In [28]:
fig_bedrooms = px.bar(df_bedrooms, x='bedrooms', y='price_per_area', orientation='v', hover_data='count')

fig_bedrooms.update_traces(marker_color='darkcyan')
fig_bedrooms.update_traces(text=df_bedrooms['price_per_area'], textposition='auto', texttemplate='%{text:.4s}')
fig_bedrooms.update_layout(
    xaxis_title='Number of Bedrooms',
    yaxis_title='Average price per area',
    title='How do bedrooms affect price?',
    title_x=0.5
)

fig_bedrooms.show()

In [98]:
df_full['price'].max()

20000.0

In [99]:
df_full[df_full['price'] == 20000]

Unnamed: 0,id,internal_id,title_abbreviation,address,price,price_per_unit_area,size_with_square_meter,construction_year,levels,bedrooms,bathrooms,date,extraction_date,geo_lat,geo_lng,company_title,account_id,ad_group_id,url,dimos,perioxi,type,area,price_per_area,level_list,min_level,level_length,max_level,a,temp_no,temp_date_unit,days_passed,kwd_ypes,enotites,construction_era,area_group
5189,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-01,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
10600,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-02,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
16171,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-03,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
21541,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-04,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
26960,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-05,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140


In [103]:
price_filter_tuple = (18000, 20000)
date_filter_value = None
dimos_filter_value = None

a = df_full[(price_filter_tuple[0] <= df_full['price']) & (df_full['price'] <= price_filter_tuple[1])]
    
if date_filter_value:
    a = a[a['extraction_date'] == date_filter_value]

if dimos_filter_value:
    a = a[a['dimos'] == dimos_filter_value]
a

Unnamed: 0,id,internal_id,title_abbreviation,address,price,price_per_unit_area,size_with_square_meter,construction_year,levels,bedrooms,bathrooms,date,extraction_date,geo_lat,geo_lng,company_title,account_id,ad_group_id,url,dimos,perioxi,type,area,price_per_area,level_list,min_level,level_length,max_level,a,temp_no,temp_date_unit,days_passed,kwd_ypes,enotites,construction_era,area_group
5189,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-01,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
10600,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-02,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
16171,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-03,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
21541,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-04,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140
26960,587671416,39968136,Μονοκατοικία 800 τ.μ.,Ψυχικό,20000.0,25,800 τ.μ.,1982.0,['0'],8.0,6.0,πριν από 2 μήνες,2023-06-05,38.017519,23.768951,Golden Home,616129.0,,https://www.xe.gr/property/d/enoikiaseis-katoi...,Φιλοθέης - Ψυχικού,,Μονοκατοικία,800.0,25.0,[0.0],0.0,1,0.0,0.0,2,μή,60,9177,ΒΟΡΕΙΟΥ ΤΟΜΕΑ ΑΘΗΝΩΝ,1980-1990,> 140


In [115]:
price_filter_tuple = (100, 20000)
area_filter_tuple = (50, 700)
bedrooms_filter_tuple = (1, 2)
bathrooms_filter_tuple = (1, 2)
level_filter_tuple = (1, 5)
date_filter_value = None
dimos_filter_value = None
type_filter_value = None


filtered_df = df_full[(price_filter_tuple[0] <= df_full['price']) & (df_full['price'] <= price_filter_tuple[1])]
print(len(filtered_df))
filtered_df = filtered_df[(area_filter_tuple[0] <= filtered_df['area']) & (filtered_df['area'] <= area_filter_tuple[1])]
print(len(filtered_df))
filtered_df = filtered_df[(bedrooms_filter_tuple[0] <= filtered_df['bedrooms']) & (filtered_df['bedrooms'] <= bedrooms_filter_tuple[1])]
print(len(filtered_df))
filtered_df = filtered_df[(bathrooms_filter_tuple[0] <= filtered_df['bathrooms']) & (filtered_df['bathrooms'] <= bathrooms_filter_tuple[1])]
print(len(filtered_df))
filtered_df = filtered_df[(level_filter_tuple[0] <= filtered_df['min_level']) & (filtered_df['min_level'] <= level_filter_tuple[1])]
print(len(filtered_df))

if date_filter_value:
    filtered_df = filtered_df[filtered_df['extraction_date'] == date_filter_value]
    print(len(filtered_df))

if dimos_filter_value:
    filtered_df = filtered_df[filtered_df['dimos'] == dimos_filter_value]
    
    print(len(filtered_df))

if type_filter_value:
    filtered_df = filtered_df[filtered_df['type'] == dimos_filter_value]
    print(len(filtered_df))

27113
22111
17566
16362
12525


In [18]:
df_full['perioxi'].unique()

array(['Λυκαβηττός', 'Άνω Πατήσια', 'Κουντουριώτικα', 'Πολύγωνο',
       'Πανόρμου', 'Λόφος Σκουζέ', 'Κάτω Πατήσια', 'Μετς', 'Ιπποκράτειο',
       'Γκύζη', 'Ιπποκράτους', 'Ελαιώνας', 'Άγιος Σώστης', 'Εξάρχεια',
       'Πεδίον Άρεως', 'Πλατεία Αττικής', 'Άνω Κυψέλη', 'Κεραμεικός',
       'Άγιος Λουκάς', 'Κέντρο', 'Ηπείρου', 'Ακαδημία Πλάτωνος',
       'Κουκάκι', 'Κυψέλη', 'Προφήτης Ηλίας', 'Κάτω Πετράλωνα',
       'Κολωνάκι', 'Παγκράτι', 'Άγιος Θωμάς', 'Πλατεία Αμερικής',
       'Παναθηναϊκό Στάδιο', 'Ιλίσια', 'Χίλτον', 'Κολιάτσου',
       'Άγιος Ιωάννης', 'Λόφος Λαμπράκη', 'Άγιος Ελευθέριος', 'Βαρνάβα',
       'Τρεις Γέφυρες', 'Αλεπότρυπα', 'Νεάπολη', 'Σταθμός Λαρίσης',
       'Νιρβάνα', 'Κολωνός', 'Νέα Κυψέλη', 'Ελληνορώσων', 'Μοναστηράκι',
       'Πλατεία Κάνιγγος', 'Άγιος Αρτέμιος', 'Άγιος Παντελεήμονας',
       'Λόφος Στρέφη', 'Μεταξουργείο', 'Αμπελόκηποι', 'Άγιος Νικόλαος',
       'Σεπόλια', 'Κυνοσάργους', 'Άνω Πετράλωνα', 'Φιλοπάππου',
       'Προμπονά', 'Μπακνανά', 'Ρηγίλλης', '