In [25]:
import pandas as pd
import plotly as plt
import numpy as np
import plotly.express as px

In [26]:
data = pd.read_csv('all_ticks_wide.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['timestamp']  = data['timestamp'].dt.tz_convert('Europe/Istanbul')
data.set_index('timestamp', inplace=True) 
data.index = data.index.tz_localize(None)
stocks = ['ASELS', 'THYAO', 'PGSUS', 'BANVT', 'MGROS', 'FROTO']

In [27]:
#function gives data between 2015-2017

def data_preparation(data, stocks):
    first_day = '2014-12-31'
    last_day = '2017-01-01'
    two_years_data = data[(data.index > first_day) & (data.index < last_day)]
    two_years_data = two_years_data[stocks]
    return two_years_data

In [28]:
#function gives daily change of price for each stock

def stocks_data(symbol, data):
    #find close values of stocks
    stock = data[symbol]
    stock_close = data.groupby([data.index.date])[symbol].agg('last')

    #creating dataframe
    frame = {'Close': stock_close}
    df = pd.DataFrame(frame)

    #grouping by months and creating a month and a year column
    stock_close.index = pd.to_datetime(stock_close.index)
    stock_close_monthly = stock_close.groupby([stock_close.index.month])
    
    df['Month'] = stock_close.index.month
    df['Year'] = stock_close.index.year
    df['Symbol'] = [symbol for i in range(len(df))]
    
    df['Change'] = (df['Close']-df['Close'].shift(1))/df['Close'].shift(1)*100
    return df


In [29]:
#outliers detection with IQR and plot each monthly data

def outliers(outliers_df, data, i, j, symbol_name):
    if j == 2017:
        return outliers_df

    monthly_df = data[(data['Month']==i) & (data['Year']==j)]

    #interquartilerange
    Q1 = monthly_df['Change'].quantile(0.25)
    Q3 = monthly_df['Change'].quantile(0.75)
    IQR = Q3 - Q1

    up = Q3 + 1.5 * IQR
    low = Q1 - 1.5 * IQR

    outli = monthly_df[(monthly_df['Change']>up) | (monthly_df['Change']<low)]
    outliers_df = outliers_df.append(outli)
    
    #because data split for months in that function we have to call plot function here
    plot(monthly_df, symbol_name)

    if i == 12:
        j = j+1
        i = 0

    return outliers(outliers_df, data, i+1, j, symbol_name)

In [30]:
#plot function

def plot(data, symbol_name):
    fig = px.line(x = data.index, y = data['Change'], title=symbol_name, markers= True)
    fig.update_layout(
        xaxis_title="Time Horizon" + ' (' + str(data.index[0]) + '/' + str(data.index[-1]) + ')',
        yaxis_title="Daily Percent Change of Closes"
        )
    fig.show()


In [31]:
six_stocks_data = data_preparation(data, stocks)

#each outlier is stored in that dataframe
outliers_df = pd.DataFrame()

for k in stocks:
    last_data = stocks_data(k, six_stocks_data)
    outliers_df = outliers(outliers_df, last_data, 1, 2015, k)


In [32]:
outliers_df

Unnamed: 0,Close,Month,Year,Symbol,Change
2015-03-16,5.9430,3,2015,ASELS,3.405077
2015-06-08,6.2186,6,2015,ASELS,-8.361332
2015-06-24,7.1810,6,2015,ASELS,5.054495
2015-09-15,6.8355,9,2015,ASELS,4.922638
2015-10-16,7.0576,10,2015,ASELS,-4.026544
...,...,...,...,...,...
2016-09-16,25.3739,9,2016,FROTO,-4.224135
2016-09-22,26.8713,9,2016,FROTO,3.353936
2016-09-26,25.9334,9,2016,FROTO,-3.312592
2016-12-01,23.5737,12,2016,FROTO,-3.742706
