# BBVA data

## Parameters

In [1]:
BASE_DIR = '/Users/efraflores/Desktop/EF/Contests/Datathon_202109/data/datos'
CP_FILE = 'Canarias_CP.csv'
FOLDER_BBVA = 'CM01_3'

## Class

In [11]:
from pathlib import Path
from geopandas import GeoDataFrame, points_from_xy
from pandas import read_csv, DataFrame, to_datetime

class BbvaData:
    def __init__(self, base_dir:str, cp_file:str, folder_name:str) -> None:
        self.base_dir = Path(base_dir)
        self.cp_filepath = self.base_dir.joinpath(cp_file)
        self.folder_path = self.base_dir.joinpath(folder_name)
        self.avg_am_files = self.folder_path.glob('avg_am*.csv')
        self.cards_files = self.folder_path.glob('cards*.csv')

    def __len__(self) -> tuple:
        return (len(list(self.avg_am_files)),len(list(self.cards_files)))

    def __str__(self) -> str:
        len_files = self.__len__()
        return f'Folder path:\t{self.folder_path}\nWith {len_files[0]} files about average amount and {len_files[-1]} about cards'

    def read_country(self, path_list, country_col='country', country='ES') -> DataFrame:
        df = DataFrame()
        for file_path in path_list:
            df_sub = read_csv(file_path)
            df_sub = df_sub[df_sub[country_col]==country].copy()
            df = df.append(df_sub, ignore_index=True)
        return df

    def merge_bbva(self) -> DataFrame:
        avg_am = self.read_country(self.avg_am_files)
        cards = self.read_country(self.cards_files)
        print(avg_am.columns, '\n', cards.columns)
        df = avg_am.merge(cards)
        return df

    def date_vars(self, df, date_col='day') -> DataFrame:
        df[date_col] = to_datetime(df[date_col])
        df[f'{date_col}_year'] = df[date_col].dt.year.map(str)
        df[f'{date_col}_month'] = df[date_col].dt.month.map(lambda x: str(x).zfill(2))
        df[f'{date_col}_yearmonth'] = df[f'{date_col}_year']+' - '+df[f'{date_col}_month']
        return df

    def make_pivot(self, date_col='day', omit_zero=['avg_amount','cards'], **pivot_kwargs) -> DataFrame:
        df = self.merge_bbva()
        df = self.date_vars(df, date_col)
        df = df.pivot_table(**pivot_kwargs)
        df.columns = [str(y)+'_'+str(x) for x,y in df.columns]
        df = df[sorted(df.columns)].copy()
        for col in omit_zero: df = df[df[col]!=0].copy()
        df.reset_index(inplace=True)
        return df

    def create_polygon(self, crs_code="EPSG:3395", lat_col='lat', lng_col='lng', zipcode_col='zipcode') -> DataFrame:
        df = read_csv(self.cp_filepath)
        gdf = GeoDataFrame(df, crs=crs_code, geometry=points_from_xy(df[lat_col], df[lng_col]))
        df = gdf.dissolve(by=zipcode_col)
        df['geometry'] = df['geometry'].buffer(0.05)
        df.reset_index(inplace=True)
        return df

    def full_pipeline(self, make_pivot_kwargs={}, polygon_kwargs={}):
        zipcode = self.create_polygon(**polygon_kwargs)
        df = self.make_pivot(**make_pivot_kwargs)
        result = df.merge(zipcode)
        return result

    def export_bbva(self, df, export_name='Finished_BBVA.csv'):
        df.to_csv(self.base_dir.joinpath(export_name), index=False)
        print(f'Exported succesfully!\nFile:\t{export_name}\nPath:\t{self.base_dir}')
        

## Result

In [12]:
bbva = BbvaData(BASE_DIR, CP_FILE, FOLDER_BBVA)
print(bbva)

Folder path:	/Users/efraflores/Desktop/EF/Contests/Datathon_202109/data/datos/CM01_3
With 102 files about average amount and 103 about cards


In [17]:
for x in bbva.cards_files: print(x)

In [15]:
bbva.read_country()

In [14]:
df = bbva.full_pipeline(make_pivot_kwargs={'index':['zipcode'], 'values':['avg_amount','cards'], 'aggfunc':['sum','count','min','max','mean','median']})
df.head()

Index([], dtype='object') 
 Index([], dtype='object')


MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

## Export

In [None]:
bbva.export_bbva(df)