In [1]:
# Libraries
import pandas as pd
import time
import datetime as dt

import ipynb.fs.full.market_ingestion_coinmarketcap
from ipynb.fs.full.market_ingestion_coinmarketcap import MarketListener
import ipynb.fs.full.market_register_coinmarketcap
from ipynb.fs.full.market_register_coinmarketcap import MarketRegister
import ipynb.fs.full.market_register_coinmarketcap_s
from ipynb.fs.full.market_register_coinmarketcap_s import SummaryMarketRegister

In [2]:
class MarketProcessing():
    """
    The MarketProcessing class is used to create the market data by adding features
    """
    
    def __init__(self,params_market = []):
        self.data_market = pd.DataFrame(params_market[0])
        self.data_summary_market = pd.DataFrame(params_market[1])
        
    def drop_duplicates_rows(data,colnames):
        """
        Drop the duplicates rows
        """
        return data.drop_duplicates(subset=colnames)
    
    def reset_index(data):
        """
        Reset the index
        """
        data = data.reset_index()
        #data = data.drop('index',1)
        return data
    
    def drop_columns(data,colnames):
        """
        Drop the columns
        """
        return data.drop(colnames,axis=1)
    
    def rename_columns(data,colnames):
        """
        Rename the columns by the colnames
        """
        return data.rename(index=str,columns=colnames)
    
    def add_datetime(data):
        """
        Add datetime features
        """
        data_datetime = pd.DataFrame()
        data_datetime['datetime'] = [dt.datetime.utcfromtimestamp(data.iloc[i]["last_updated"]) for i in range(len(data["last_updated"]))]
        data_datetime['year'] = [x.year for x in data_datetime['datetime']] 
        data_datetime['month'] = [x.month for x in data_datetime['datetime']]
        data_datetime['day'] = [x.day for x in data_datetime['datetime']]
        data_datetime['hour'] = [x.hour for x in data_datetime['datetime']]
        data_datetime['minute'] = [x.minute for x in data_datetime['datetime']] 
        data_datetime['second'] = [x.second for x in data_datetime['datetime']]
        return data_datetime    
    
    def add_date(data):
        """
        Add the date
        """
        return [dt.datetime(data.iloc[i]['year'],data.iloc[i]['month'],data.iloc[i]['day'],data.iloc[i]['hour']) for i in range(len(data))] 
        
        
    def add_statistics(data,vargrouped,varnames):
        """
        Add the descriptive statistics features
        """
        data_stats = pd.DataFrame()
        stats = data.groupby(vargrouped).agg(['last','max','mean','median','std','min','first'])
        
        statnames = ['last','max','mean','median','std','min','first']
        
        for var in varnames:
            for stat in statnames:
                data_stats[stat+'_'+var] = stats[var][stat]
                
        data_stats.reset_index(inplace=True) 
                
        return data_stats
    
    def add_percent_currency_marketcap(self):
        """
        Add the percent of the currency on the marketcap
        """
        return 100 * self.data_market["market_cap_usd"] / self.data_market["total_market_cap_usd"] 
    
    def add_indicators():
        """
        Add the trading indicators 
        <TO DO>
        """
        return None
       
    def df_aggregate_by_id(data):
        """
        Create a list of the dataframe aggregated by id
        """
        data_aggregated = []
        list_id = list(set(data['id']))
        for id in list_id:
            data_aggregated.append(data[data['id']==id])
        return data_aggregated
    
    def add_duplicated_rows(self): 
        """
        Add the duplicated rows 
        """
        list_id_dupplicated = list(set(self.data_market['id'])) * len(self.data_summary_market)
        self.data_summary_market = self.data_summary_market.append([self.data_summary_market] * (len(set(self.data_market['id']))-1),ignore_index=True)
        self.data_summary_market = self.data_summary_market.sort_values(['date'])
        self.data_summary_market['id'] = list_id_dupplicated
        return self.data_summary_market
    
    def build_data_market(self):
        """
        Build the dataframe with the market
        """
        # drop duplicates rows
        self.data_market = MarketProcessing.drop_duplicates_rows(self.data_market,["id","last_updated"])
        self.data_market = MarketProcessing.reset_index(self.data_market)
        
        # add datetime to the market data
        #--
        datetime = MarketProcessing.add_datetime(self.data_market)
        self.data_market[['datetime','year','month','day','hour','minute','second']] = datetime

        # add statistics to the market data
        #--
        stock = self.data_market[['id','year','month','day','hour','price_btc','price_usd','percent_change_1h','percent_change_7d',
                                 'volume_24h_usd','percent_change_24h','max_supply','total_supply']]
        statistics = MarketProcessing.add_statistics(stock,vargrouped=['id','year','month','day','hour'],varnames=['price_btc','price_usd','percent_change_1h','percent_change_7d','volume_24h_usd','percent_change_24h','max_supply','total_supply'])
        
        # drop duplicates rows
        self.data_market = MarketProcessing.drop_duplicates_rows(self.data_market,["id","year","month","day","hour"])
        self.data_market = MarketProcessing.reset_index(self.data_market)
        
        self.data_market = self.data_market.merge(statistics,on=["id","year","month","day","hour"],how="left")
        #--
        
        # add date to the market data
        self.data_market['date'] = MarketProcessing.add_date(self.data_market)
        
        colnames = ['level_0','index','last_updated','datetime','minute','second','price_btc','price_usd','percent_change_1h','percent_change_7d','volume_24h_usd','percent_change_24h','max_supply','total_supply']
        self.data_market = MarketProcessing.drop_columns(self.data_market,colnames)
        
        return self.data_market
    
    def build_data_summary_market(self):
        """
        Build the dataframe with the market summary
        """
        # drop duplicates rows
        self.data_summary_market = MarketProcessing.drop_duplicates_rows(self.data_summary_market,["last_updated"])
        self.data_summary_market = MarketProcessing.reset_index(self.data_summary_market)
        
        # add datetime to the summary market data
        #--
        datetime = MarketProcessing.add_datetime(self.data_summary_market)
        self.data_summary_market[['datetime','year','month','day','hour','minute','second']] = datetime

        self.data_summary_market = MarketProcessing.drop_duplicates_rows(self.data_summary_market,['year','month','day','hour'])
        self.data_summary_market = MarketProcessing.reset_index(self.data_summary_market)
        
        colnames = ['level_0','index','last_updated','datetime','minute','second','bitcoin_percentage_of_market_cap']
        self.data_summary_market = MarketProcessing.drop_columns(self.data_summary_market,colnames)
        #--
        
        return self.data_summary_market
    
    def featurize(self):
        """
        Create the features 
        """
        # build the data_market
        self.data_market = MarketProcessing.build_data_market(self)
        
        # build the data_summary_market
        self.data_summary_market = MarketProcessing.build_data_summary_market(self)
        
        # merge the data_market with data_summary_market
        self.data_market = self.data_market.merge(self.data_summary_market,on=["year","month","day","hour"],how="left")
        
        # add percent_currency_market_cap_usd to the features
        self.data_market['percent_currency_market_cap_usd'] = MarketProcessing.add_percent_currency_marketcap(self)
    
        colnames = {'cached_x','cached_y','year','month','day','hour'}
        self.data_market = MarketProcessing.drop_columns(self.data_market,colnames)
        
        return self.data_market
         
    def put_csv(self,data,filename,type_mode="w"):
        """
        Export the data to a csv file
        """
        data.to_csv(filename,encoding="utf-8",mode=type_mode)
        return
        
        
    