In [29]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils import functions as f

pd.options.mode.chained_assignment = None

# 1. Loading

In [12]:
def load_processed():
    path = 'data/processed_data/process_data.csv'
    if os.path.isfile(path):
        return pd.read_csv(path)
    print('No processed data at : \n{}'.format(path))
    return pd.DataFrame()

def load_dvf():
    return None

# Little exploration

In [14]:
#df[df[['prix', 'surface', 'code_postal']].duplicated(keep=False)].sort_values('prix').head()

# 2. Analysis

In [30]:
def is_first_time(df):
    if df.shape[0]==0:
        return True
    return False

def separate_old_new(df, ts):
    half = np.int(df.shape[0]*0.8)
    df_old = df.iloc[:half]
    df_new = df.iloc[half:]
    return df_old, df_new


def delete_viager_offers(df):
    if 'viager' in df.columns:
        # Deletint viager data for further analyse
        df = df[df['viager']==False]
        del(df['viager'])
    else:
        print('No column named viager')
    return df

In [31]:
def compute_villes_in_dfs(df_old, df_new):
    # Looking at cities in both dataFrame and their intersection
    ville_in_new = df_new['ville'].unique().tolist()
    ville_in_old = df_old['ville'].unique().tolist()
    ville_new = [ville for ville in ville_in_new if ville not in ville_in_old]
    ville_inter = [ville for ville in ville_in_new if ville in ville_in_old]
    ville_in_new.sort() ; ville_in_old.sort() ; ville_new.sort() ; ville_inter.sort()
    if False:
        print('Ville in new : {}'.format(ville_in_new))
        print('Ville in old : {}'.format(ville_in_old))
        print('Ville in both : {}'.format(ville_inter))
        print('Ville new : {}'.format(ville_new))
    return ville_inter


def compute_mean_d1(df_old, df_new, ville_inter):
    # Calculating information
    old_mean = df_old.groupby('ville').mean()[['prix', 'surface', 'prix_m2']].applymap(lambda x : np.round(x,2))
    old_d1 = df_old.groupby('ville').quantile(q=0.1)[['prix', 'surface', 'prix_m2']].applymap(lambda x : np.round(x,2))

    # Computing columuns for comparison
    df_new['moy_ville'] = df_new['ville'].apply(lambda x: old_mean.loc[x,'prix_m2'] if x in ville_inter else -1)  
    df_new['inf_moy_ville'] = df_new['prix_m2'] < df_new['moy_ville']
    df_new['pct_mieux_m2'] = ((df_new['prix_m2'] - df_new['moy_ville']).div(df_new['moy_ville'])*100).apply(np.round)

    df_new['d1_ville'] = df_new['ville'].apply(lambda x: old_d1.loc[x,'prix_m2'] if x in ville_inter else  -1) # Ville nouvelle => Moy à 0
    df_new['inf_d1_ville'] = df_new['prix_m2'] < df_new['d1_ville']

    df_new.loc[df_new['moy_ville']<0, ['inf_moy_ville' ,'inf_d1_ville']] = False
    df_new.loc[df_new['moy_ville']<0, [ 'd1_ville', 'pct_mieux_m2']] = np.NaN
    return df_new


def create_alert_d1(df_new):
    # Selectiong only lines where price is in decil one
    df_inf_d1 = df_new[df_new.inf_d1_ville]
    message = ''
    if len(df_inf_d1) == 0:
        print('> No new line : nothing to update.')
    else:
        # Computing the message for slack

        for key, value in df_inf_d1.ville.value_counts().sort_index().to_dict().items():
            message += '*{}*:{} annonces décile 1.\n'.format(key, value)

        default_url = 'www.google.fr'
        for i in df_inf_d1.sort_values(by='ville').iterrows():
            line = i[1]
            ville = line['ville']
            #url = line['url']
            url = default_url
            prix = line['prix']
            surface = line['surface']
            moy_ville = line['moy_ville']
            prix_m2 = line['prix_m2']
            pct_mieux_m2 = line['pct_mieux_m2']
            ville_url = '\n<{}|{}>'.format(url, ville) 
            message += '\n{} : {} m², {} €, \n[€/m² *{}* vs {} : {}%]'.format(ville_url, surface, prix, prix_m2, moy_ville, pct_mieux_m2)
    return message

In [35]:
# Loading information of when last analyse was realized
last_analyse = f.load_ts_analyse()
#last_analyse = pd.Timestamp('20190425')
print('Last analyse realised at {}'.format(last_analyse))

# Loading data
df= load_processed()
df_dvf = load_dvf()
print('Size : ', df.shape)

# Separating data
df_old, df_new = separate_old_new(df, last_analyse)

# Prepare df for alerts
ville_inter = compute_villes_in_dfs(df_old, df_new)
df_new = compute_mean_d1(df_old, df_new, ville_inter)

# Compute alert
message = create_alert_d1(df_new)

Last analyse realised at 2019-05-30 18:48:34
Size :  (1584, 8)


# 3. Saving

In [147]:
def save_alert(message, channel="test_channel"):
    alert = {"channel":channel,
            "message":message,
            "emoji":":female-firefighter:"}
    
    folder = 'data/alert_files/'
    now = f.get_now()
    path = '{}/alert_{}.json'.format(folder, now)
    with open(path, 'w') as outfile:  
        json.dump(alert, outfile)


In [None]:
save_alert(message)