In [3]:
import sqlalchemy
import uvicorn
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

import re
import time
import datetime

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [27]:
def to_lowercase(df):
	df = df.copy()
	df.rename({i:i.lower() for i in df.columns.values}, axis=1, inplace=True)
	return df

def to_snakecase(df):
	snakecase = {i: re.sub(r"[,.;@#?!&$]+\ *", "", i).replace(" ", "_") for i in df}
	df.rename(columns=snakecase, inplace=True)
	return df

# def to_datetime(df, data_format):
	

def get_Users(path="../../../data/users.csv", date_format = "%Y-%m"):
	users = to_snakecase(to_lowercase(pd.read_csv(path)))
	users['user'] = users['user'].astype(str).str.pad(width=10, side='left', fillchar='0')
	users['birth_year'] = users['birth_year'].astype(str) + '-' + users['birth_month'].astype(str)
	users['birth_year'] = pd.to_datetime(users['birth_year'], format=date_format)
	users = users.drop(columns= ['birth_month'])
	users = users.rename(columns={'birth_year': 'birth_year_month'})
	return users


def get_CreditCards(path="../../../data/users.csv", date_format = "%m/%Y"):
	credit_cards = to_snakecase(to_lowercase(pd.read_csv("../../../data/credit_cards.csv")))
	credit_cards['user'] = credit_cards['user'].astype(str).str.pad(width=10, side='left', fillchar='0')
	credit_cards['expires'] = pd.to_datetime(credit_cards['expires'], format=date_format)
	credit_cards['acct_open_date'] = pd.to_datetime(credit_cards['expires'], format=date_format)
	credit_cards['year_pin_last_changed'] = pd.to_datetime(credit_cards['year_pin_last_changed'], format="%Y")
	return credit_cards

def get_Transactions(path="../../../data/transactions.csv", date_format = "%m/%Y"):
	transactions = to_snakecase(to_lowercase(pd.read_csv(path)))
	transactions.insert(0, 'identifier', transactions.index + 1) 
	transactions['user'] = transactions['user'].astype(str).str.pad(width=10, side='left', fillchar='0')
	transactions = transactions.rename(columns={'card':'card_index'})
	hour_min = transactions['time'].str.split(":", expand=True).rename(columns={0:'hour', 1:'minute'})
	transactions = pd.concat([transactions, hour_min], axis=1)

	date_cols = ['year', 'month', 'day', 'hour', 'minute']
	transactions['date'] = pd.to_datetime(transactions[date_cols])

	cc_no = get_CreditCards()[['user', 'card_index', 'card_number']]
	card_no = transactions.merge(cc_no, how='inner', on=['user', 'card_index'])['card_number']
	transactions.insert(1, 'card_number', card_no) 
	transactions = transactions.drop(columns= ['card_index', 'time'] + date_cols)
	return transactions

def get_Products(path="../../../data/santander_train_small.csv", 
				column_mapping = {
					"fecha_dato": "report_date",
					"ncodpers": "customer_id",
					"ind_empleado": "employee_index",
					"pais_residencia": "country_residence",
					"sexo": "gender",
					"age": "age",
					"fecha_alta": "contract_start_date",
					"ind_nuevo": "new_customer_index",
					"antiguedad": "seniority_months",
					"indrel": "primary_customer_status",
					"ult_fec_cli_1t": "last_primary_customer_date",
					"indrel_1mes": "customer_type_start_month",
					"tiprel_1mes": "customer_relation_type",
					"indresi": "residence_index",
					"indext": "foreigner_index",
					"conyuemp": "spouse_employee_index",
					"canal_entrada": "join_channel",
					"indfall": "deceased_index",
					"tipodom": "address_type",
					"cod_prov": "province_code",
					"nomprov": "province_name",
					"ind_actividad_cliente": "activity_index",
					"renta": "gross_income",
					"segmento": "customer_segment",
					"ind_ahor_fin_ult1": "saving_account",
					"ind_aval_fin_ult1": "guarantee",
					"ind_cco_fin_ult1": "current_account",
					"ind_cder_fin_ult1": "derivada_account",
					"ind_cno_fin_ult1": "payroll_account",
					"ind_ctju_fin_ult1": "junior_account",
					"ind_ctma_fin_ult1": "more_particular_account",
					"ind_ctop_fin_ult1": "particular_account",
					"ind_ctpp_fin_ult1": "particular_plus_account",
					"ind_deco_fin_ult1": "short_term_deposits",
					"ind_deme_fin_ult1": "medium_term_deposits",
					"ind_dela_fin_ult1": "long_term_deposits",
					"ind_ecue_fin_ult1": "e_account",
					"ind_fond_fin_ult1": "funds",
					"ind_hip_fin_ult1": "mortgage",
					"ind_plan_fin_ult1": "pensions",
					"ind_pres_fin_ult1": "loans",
					"ind_reca_fin_ult1": "taxes",
					"ind_tjcr_fin_ult1": "credit_card",
					"ind_valo_fin_ult1": "securities",
					"ind_viv_fin_ult1": "home_account",
					"ind_nomina_ult1": "payroll",
					"ind_nom_pens_ult1": "pensions_payments",
					"ind_recibo_ult1": "direct_debit"
				}
	):
	santender = pd.read_csv(path)
	santender = santender.rename(columns=column_mapping)
	return santender

users = get_Users()
credit_cards = get_CreditCards()
transactions = get_Transactions()
santender = get_Products()

santender

Unnamed: 0,report_date,customer_id,employee_index,country_residence,gender,age,contract_start_date,new_customer_index,seniority_months,primary_customer_status,...,mortgage,pensions,loans,taxes,credit_card,securities,home_account,payroll,pensions_payments,direct_debit
0,2015-01-28,1375586,N,ES,H,35,2015-01-12,0.0,6,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
1,2015-01-28,1050611,N,ES,V,23,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
2,2015-01-28,1050612,N,ES,V,23,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
3,2015-01-28,1050613,N,ES,H,22,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
4,2015-01-28,1050614,N,ES,V,23,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9994,2015-01-28,1065714,N,ES,H,23,2012-09-24,0.0,34,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
9995,2015-01-28,1065716,N,ES,V,27,2012-09-24,0.0,34,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
9996,2015-01-28,1065717,N,ES,V,24,2012-09-24,0.0,34,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
9997,2015-01-28,1065719,N,ES,H,23,2012-09-24,0.0,34,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0


In [5]:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship, Mapped, mapped_column
from sqlalchemy import create_engine, Column, Integer, String, Double, DateTime, ForeignKey
from sqlalchemy.sql.sqltypes import TIMESTAMP
from sqlalchemy.sql.expression import text


def create_db(user="root", password="msql1234", server="localhost", database="transact"):
    SQLALCHEMY_DATABASE_URL = "mysql+pymysql://{}:{}@{}/{}".format(
        user, password, server, database
    )
    engine = create_engine(SQLALCHEMY_DATABASE_URL)

    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
    Base = declarative_base()

    return engine, SessionLocal, Base

engine, SessionLocal, Base = create_db()


db = engine.connect()


class Users(Base):
	__tablename__ = 'users'
	user = Column(String(10), primary_key=True, nullable=False)
	person = Column(String(32))
	current_age = Column(Integer)
	retirement_age = Column(Integer)
	birth_year_month = Column(DateTime)
	gender = Column(String(32))
	address = Column(String(64))
	apartment = Column(Integer)
	city = Column(String(32))
	state = Column(String(32))
	zipcode = Column(String(32))
	latitude = Column(Double)
	longitude = Column(Double)
	per_capita_income = Column(Double)
	yearly_income = Column(Double)
	total_debt = Column(Double)
	fico_score = Column(Double)
	num_credit_cards = Column(Integer)


class Cards(Base):
	__tablename__ = 'credit_cards'
	user = Column(String(10), ForeignKey("users.user", ondelete="CASCADE"), nullable=False)
	card_index = Column(Integer)
	card_brand = Column(String(32))
	card_type = Column(String(32))
	card_number = Column(String(16), primary_key=True, nullable=False)
	expires  = Column(DateTime)
	cvv = Column(String(3))
	has_chip = Column(String(3)) 
	cards_issued = Column(Integer)
	credit_limit = Column(Double)
	acct_open_date = Column(DateTime)
	year_pin_last_changed = Column(DateTime)
	card_on_dark_web = Column(String(3))

class Transactions(Base):
	__tablename__ = "transactions"
	identifier = Column(Integer, primary_key=True, autoincrement=True)
	user = Column(String(10), ForeignKey("users.user", ondelete="CASCADE"), nullable=False)
	card_number = Column(String(16), ForeignKey("credit_cards.card_number", ondelete="CASCADE"), nullable=False)
	date = Column(DateTime)
	amount = Column(Double)
	use_chip = Column(String(32))
	merchant_name = Column(String(32))
	merchant_city = Column(String(32))
	merchant_state = Column(String(32))
	zip = Column(String(16))
	mcc = Column(Integer)
	errors = Column(String(32))
	is_fraud = Column(String(3))


Base.metadata.create_all(engine)

db.commit()
db.close()

In [6]:
with engine.connect() as db:
	try: 
		users.to_sql('users', con=engine, if_exists='append', index=False)
		credit_cards.to_sql('credit_cards', con=engine, if_exists='append', index=False)
		transactions.to_sql('transactions', con=engine, if_exists='append', index=False)
		db.commit()
	except:
		db.rollback()
	db.close()


In [None]:
def getEntries(table):
	db = engine.connect()
	query_string = sqlalchemy.text(
		"""SELECT * FROM {} LIMIT 100""".format(table)
	)
	fetched = db.execute(query_string).fetchall()
	db.close()
	return fetched


In [22]:
getEntries('users')[:5]

[('0000000000', 'Hazel Robinson', 53, 66, datetime.datetime(1966, 11, 1, 0, 0), 'Female', '462 Rose Lane', None, 'La Verne', 'CA', '91750', 34.15, -117.76, 29278.0, 59696.0, 127613.0, 787.0, 5),
 ('0000000001', 'Sasha Sadr', 53, 68, datetime.datetime(1966, 12, 1, 0, 0), 'Female', '3606 Federal Boulevard', None, 'Little Neck', 'NY', '11363', 40.76, -73.74, 37891.0, 77254.0, 191349.0, 701.0, 5),
 ('0000000002', 'Saanvi Lee', 81, 67, datetime.datetime(1938, 11, 1, 0, 0), 'Female', '766 Third Drive', None, 'West Covina', 'CA', '91792', 34.02, -117.89, 22681.0, 33483.0, 196.0, 698.0, 5),
 ('0000000003', 'Everlee Clark', 63, 63, datetime.datetime(1957, 1, 1, 0, 0), 'Female', '3 Madison Street', None, 'New York', 'NY', '10069', 40.71, -73.99, 163145.0, 249925.0, 202328.0, 722.0, 4),
 ('0000000004', 'Kyle Peterson', 43, 70, datetime.datetime(1976, 9, 1, 0, 0), 'Male', '9620 Valley Stream Drive', None, 'San Francisco', 'CA', '94117', 37.76, -122.44, 53797.0, 109687.0, 183855.0, 675.0, 1)]

In [17]:
getEntries('credit_cards')[:5]

[('0000000487', 0, 'Amex', 'Credit', '300105541992311', datetime.datetime(2010, 7, 1, 0, 0), '939', 'NO', 1, 14400.0, datetime.datetime(2010, 7, 1, 0, 0), datetime.datetime(2010, 1, 1, 0, 0), 'No'),
 ('0000001997', 1, 'Amex', 'Credit', '300609782832003', datetime.datetime(2024, 1, 1, 0, 0), '663', 'YES', 1, 6900.0, datetime.datetime(2024, 1, 1, 0, 0), datetime.datetime(2013, 1, 1, 0, 0), 'No'),
 ('0000000272', 5, 'Amex', 'Credit', '300717031458937', datetime.datetime(2009, 9, 1, 0, 0), '194', 'YES', 1, 5800.0, datetime.datetime(2009, 9, 1, 0, 0), datetime.datetime(2008, 1, 1, 0, 0), 'No'),
 ('0000000613', 2, 'Amex', 'Credit', '302014253634948', datetime.datetime(2022, 3, 1, 0, 0), '649', 'YES', 2, 8900.0, datetime.datetime(2022, 3, 1, 0, 0), datetime.datetime(2015, 1, 1, 0, 0), 'No'),
 ('0000001335', 1, 'Amex', 'Credit', '302031764623099', datetime.datetime(2020, 11, 1, 0, 0), '654', 'YES', 2, 9900.0, datetime.datetime(2020, 11, 1, 0, 0), datetime.datetime(2020, 1, 1, 0, 0), 'No')]

In [18]:
getEntries('transactions')[:5]

[(1, '0000000000', '4344676511950444', datetime.datetime(2002, 9, 1, 6, 21), 134.09, 'Swipe Transaction', '3527213246127876953', 'La Verne', 'CA', '91750', 5300, None, 'No'),
 (2, '0000000000', '4344676511950444', datetime.datetime(2002, 9, 1, 6, 42), 38.48, 'Swipe Transaction', '-727612092139916043', 'Monterey Park', 'CA', '91754', 5411, None, 'No'),
 (3, '0000000000', '4344676511950444', datetime.datetime(2002, 9, 2, 6, 22), 120.34, 'Swipe Transaction', '-727612092139916043', 'Monterey Park', 'CA', '91754', 5411, None, 'No'),
 (4, '0000000000', '4344676511950444', datetime.datetime(2002, 9, 2, 17, 45), 128.95, 'Swipe Transaction', '3414527459579106770', 'Monterey Park', 'CA', '91754', 5651, None, 'No'),
 (5, '0000000000', '4344676511950444', datetime.datetime(2002, 9, 3, 6, 23), 104.71, 'Swipe Transaction', '5817218446178736267', 'La Verne', 'CA', '91750', 5912, None, 'No')]