# EDA - Transactions

In [None]:
from pathlib import Path
transactions_csv_file = Path().cwd().parent / 'data' / 'transactions.csv'

import pandas as pd
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [None]:
# Create dataframe from csv.
transactions = pd.read_csv(transactions_csv_file)

In [None]:
# Quick look at the data.
transactions.head()

In [None]:
# Display all unique values for trade type.
print(f'Unique trade types: {transactions["trade_type"].unique()}')

In [None]:
# Non-null count and Dtypes for each column.
transactions.info()

In [None]:
# Show dataframe stats.
print(f'Shape of transactions dataframe: {transactions.shape}')
transactions.describe(include='all')

In [None]:
# Remove leading/trailing whitespace from strings & convert to lowercase and recheck counts, unique values and freq.
transactions = transactions.map(lambda x: x.strip().lower() if isinstance(x, str) else x)
transactions.columns = transactions.columns.str.strip().str.lower()
transactions.describe(include='all')

In [None]:
# Check for missing values.
print(f'Missing values in price history:\n{transactions.isna().sum()}')

## Notes
- date column should be converted to datetime, extracted to dimension date.
- fix typos in trade type (should be either buy or sell)
- trade type should be converted to single character (b: buy and s: sell) and dtype('category')
- account_id should be converted to int
- transaction_id should be created after dropping rows